From 93fa7636dfdc059b25df148f230c0991096afdef Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 8 Apr 2008 11:01:58 +0200 Subject: x86, ptrace: PEBS support Polish the ds.h interface and add support for PEBS. Ds.c is meant to be the resource allocator for per-thread and per-cpu BTS and PEBS recording. It is used by ptrace/utrace to provide execution tracing of debugged tasks. It will be used by profilers (e.g. perfmon2). It may be used by kernel debuggers to provide a kernel execution trace. Changes in detail: - guard DS and ptrace by CONFIG macros - separate DS and BTS more clearly - simplify field accesses - add functions to manage PEBS buffers - add simple protection/allocation mechanism - added support for Atom Opens: - buffer overflow handling Currently, only circular buffers are supported. This is all we need for debugging. Profilers would want an overflow notification. This is planned to be added when perfmon2 is made to use the ds.h interface. - utrace intermediate layer Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2ad6301..13f3f8b 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -415,3 +415,21 @@ config X86_MINIMUM_CPU_FAMILY config X86_DEBUGCTLMSR def_bool y depends on !(M586MMX || M586TSC || M586 || M486 || M386) + +config X86_DS + bool "Debug Store support" + default y + help + Add support for Debug Store. + This allows the kernel to provide a memory buffer to the hardware + to store various profiling and tracing events. + +config X86_PTRACE_BTS + bool "ptrace interface to Branch Trace Store" + default y + depends on (X86_DS && X86_DEBUGCTLMSR) + help + Add a ptrace interface to allow collecting an execution trace + of the traced task. + This collects control flow changes in a (cyclic) buffer and allows + debuggers to fill in the gaps and show an execution trace of the debuggee. diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fe9224c..cbffa2a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -222,10 +222,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_BTS); if (!(l1 & (1<<12))) set_cpu_cap(c, X86_FEATURE_PEBS); + ds_init_intel(c); } if (cpu_has_bts) - ds_init_intel(c); + ptrace_bts_init_intel(c); } static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 11c11b8..5b32b6d 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -2,26 +2,48 @@ * Debug Store support * * This provides a low-level interface to the hardware's Debug Store - * feature that is used for last branch recording (LBR) and + * feature that is used for branch trace store (BTS) and * precise-event based sampling (PEBS). * - * Different architectures use a different DS layout/pointer size. - * The below functions therefore work on a void*. + * It manages: + * - per-thread and per-cpu allocation of BTS and PEBS + * - buffer memory allocation (optional) + * - buffer overflow handling + * - buffer access * + * It assumes: + * - get_task_struct on all parameter tasks + * - current is allowed to trace parameter tasks * - * Since there is no user for PEBS, yet, only LBR (or branch - * trace store, BTS) is supported. * - * - * Copyright (C) 2007 Intel Corporation. - * Markus Metzger , Dec 2007 + * Copyright (C) 2007-2008 Intel Corporation. + * Markus Metzger , 2007-2008 */ + +#ifdef CONFIG_X86_DS + #include #include #include #include +#include + + +/* + * The configuration for a particular DS hardware implementation. + */ +struct ds_configuration { + /* the size of the DS structure in bytes */ + unsigned char sizeof_ds; + /* the size of one pointer-typed field in the DS structure in bytes; + this covers the first 8 fields related to buffer management. */ + unsigned char sizeof_field; + /* the size of a BTS/PEBS record in bytes */ + unsigned char sizeof_rec[2]; +}; +static struct ds_configuration ds_cfg; /* @@ -44,378 +66,747 @@ * (interrupt occurs when write pointer passes interrupt pointer) * - value to which counter is reset following counter overflow * - * On later architectures, the last branch recording hardware uses - * 64bit pointers even in 32bit mode. - * - * - * Branch Trace Store (BTS) records store information about control - * flow changes. They at least provide the following information: - * - source linear address - * - destination linear address + * Later architectures use 64bit pointers throughout, whereas earlier + * architectures use 32bit pointers in 32bit mode. * - * Netburst supported a predicated bit that had been dropped in later - * architectures. We do not suppor it. * + * We compute the base address for the first 8 fields based on: + * - the field size stored in the DS configuration + * - the relative field position + * - an offset giving the start of the respective region * - * In order to abstract from the actual DS and BTS layout, we describe - * the access to the relevant fields. - * Thanks to Andi Kleen for proposing this design. + * This offset is further used to index various arrays holding + * information for BTS and PEBS at the respective index. * - * The implementation, however, is not as general as it might seem. In - * order to stay somewhat simple and efficient, we assume an - * underlying unsigned type (mostly a pointer type) and we expect the - * field to be at least as big as that type. + * On later 32bit processors, we only access the lower 32bit of the + * 64bit pointer fields. The upper halves will be zeroed out. */ -/* - * A special from_ip address to indicate that the BTS record is an - * info record that needs to be interpreted or skipped. - */ -#define BTS_ESCAPE_ADDRESS (-1) +enum ds_field { + ds_buffer_base = 0, + ds_index, + ds_absolute_maximum, + ds_interrupt_threshold, +}; -/* - * A field access descriptor - */ -struct access_desc { - unsigned char offset; - unsigned char size; +enum ds_qualifier { + ds_bts = 0, + ds_pebs }; +static inline unsigned long ds_get(const unsigned char *base, + enum ds_qualifier qual, enum ds_field field) +{ + base += (ds_cfg.sizeof_field * (field + (4 * qual))); + return *(unsigned long *)base; +} + +static inline void ds_set(unsigned char *base, enum ds_qualifier qual, + enum ds_field field, unsigned long value) +{ + base += (ds_cfg.sizeof_field * (field + (4 * qual))); + (*(unsigned long *)base) = value; +} + + /* - * The configuration for a particular DS/BTS hardware implementation. + * Locking is done only for allocating BTS or PEBS resources and for + * guarding context and buffer memory allocation. + * + * Most functions require the current task to own the ds context part + * they are going to access. All the locking is done when validating + * access to the context. */ -struct ds_configuration { - /* the DS configuration */ - unsigned char sizeof_ds; - struct access_desc bts_buffer_base; - struct access_desc bts_index; - struct access_desc bts_absolute_maximum; - struct access_desc bts_interrupt_threshold; - /* the BTS configuration */ - unsigned char sizeof_bts; - struct access_desc from_ip; - struct access_desc to_ip; - /* BTS variants used to store additional information like - timestamps */ - struct access_desc info_type; - struct access_desc info_data; - unsigned long debugctl_mask; -}; +static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); /* - * The global configuration used by the below accessor functions + * Validate that the current task is allowed to access the BTS/PEBS + * buffer of the parameter task. + * + * Returns 0, if access is granted; -Eerrno, otherwise. */ -static struct ds_configuration ds_cfg; +static inline int ds_validate_access(struct ds_context *context, + enum ds_qualifier qual) +{ + if (!context) + return -EPERM; + + if (context->owner[qual] == current) + return 0; + + return -EPERM; +} + /* - * Accessor functions for some DS and BTS fields using the above - * global ptrace_bts_cfg. + * We either support (system-wide) per-cpu or per-thread allocation. + * We distinguish the two based on the task_struct pointer, where a + * NULL pointer indicates per-cpu allocation for the current cpu. + * + * Allocations are use-counted. As soon as resources are allocated, + * further allocations must be of the same type (per-cpu or + * per-thread). We model this by counting allocations (i.e. the number + * of tracers of a certain type) for one type negatively: + * =0 no tracers + * >0 number of per-thread tracers + * <0 number of per-cpu tracers + * + * The below functions to get and put tracers and to check the + * allocation type require the ds_lock to be held by the caller. + * + * Tracers essentially gives the number of ds contexts for a certain + * type of allocation. */ -static inline unsigned long get_bts_buffer_base(char *base) +static long tracers; + +static inline void get_tracer(struct task_struct *task) { - return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset); + tracers += (task ? 1 : -1); } -static inline void set_bts_buffer_base(char *base, unsigned long value) + +static inline void put_tracer(struct task_struct *task) { - (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value; + tracers -= (task ? 1 : -1); } -static inline unsigned long get_bts_index(char *base) + +static inline int check_tracer(struct task_struct *task) { - return *(unsigned long *)(base + ds_cfg.bts_index.offset); + return (task ? (tracers >= 0) : (tracers <= 0)); } -static inline void set_bts_index(char *base, unsigned long value) + + +/* + * The DS context is either attached to a thread or to a cpu: + * - in the former case, the thread_struct contains a pointer to the + * attached context. + * - in the latter case, we use a static array of per-cpu context + * pointers. + * + * Contexts are use-counted. They are allocated on first access and + * deallocated when the last user puts the context. + * + * We distinguish between an allocating and a non-allocating get of a + * context: + * - the allocating get is used for requesting BTS/PEBS resources. It + * requires the caller to hold the global ds_lock. + * - the non-allocating get is used for all other cases. A + * non-existing context indicates an error. It acquires and releases + * the ds_lock itself for obtaining the context. + * + * A context and its DS configuration are allocated and deallocated + * together. A context always has a DS configuration of the + * appropriate size. + */ +static DEFINE_PER_CPU(struct ds_context *, system_context); + +#define this_system_context per_cpu(system_context, smp_processor_id()) + +/* + * Returns the pointer to the parameter task's context or to the + * system-wide context, if task is NULL. + * + * Increases the use count of the returned context, if not NULL. + */ +static inline struct ds_context *ds_get_context(struct task_struct *task) { - (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value; + struct ds_context *context; + + spin_lock(&ds_lock); + + context = (task ? task->thread.ds_ctx : this_system_context); + if (context) + context->count++; + + spin_unlock(&ds_lock); + + return context; } -static inline unsigned long get_bts_absolute_maximum(char *base) + +/* + * Same as ds_get_context, but allocates the context and it's DS + * structure, if necessary; returns NULL; if out of memory. + * + * pre: requires ds_lock to be held + */ +static inline struct ds_context *ds_alloc_context(struct task_struct *task) { - return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset); + struct ds_context **p_context = + (task ? &task->thread.ds_ctx : &this_system_context); + struct ds_context *context = *p_context; + + if (!context) { + context = kzalloc(sizeof(*context), GFP_KERNEL); + + if (!context) + return 0; + + context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); + if (!context->ds) { + kfree(context); + return 0; + } + + *p_context = context; + + context->this = p_context; + context->task = task; + + if (task) + set_tsk_thread_flag(task, TIF_DS_AREA_MSR); + + if (!task || (task == current)) + wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); + + get_tracer(task); + } + + context->count++; + + return context; } -static inline void set_bts_absolute_maximum(char *base, unsigned long value) + +/* + * Decreases the use count of the parameter context, if not NULL. + * Deallocates the context, if the use count reaches zero. + */ +static inline void ds_put_context(struct ds_context *context) { - (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value; + if (!context) + return; + + spin_lock(&ds_lock); + + if (--context->count) + goto out; + + *(context->this) = 0; + + if (context->task) + clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); + + if (!context->task || (context->task == current)) + wrmsrl(MSR_IA32_DS_AREA, 0); + + put_tracer(context->task); + + /* free any leftover buffers from tracers that did not + * deallocate them properly. */ + kfree(context->buffer[ds_bts]); + kfree(context->buffer[ds_pebs]); + kfree(context->ds); + kfree(context); + out: + spin_unlock(&ds_lock); } -static inline unsigned long get_bts_interrupt_threshold(char *base) + + +/* + * Handle a buffer overflow + * + * task: the task whose buffers are overflowing; + * NULL for a buffer overflow on the current cpu + * context: the ds context + * qual: the buffer type + */ +static void ds_overflow(struct task_struct *task, struct ds_context *context, + enum ds_qualifier qual) { - return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset); + if (!context) + return; + + if (context->callback[qual]) + (*context->callback[qual])(task); + + /* todo: do some more overflow handling */ } -static inline void set_bts_interrupt_threshold(char *base, unsigned long value) + + +/* + * Allocate a non-pageable buffer of the parameter size. + * Checks the memory and the locked memory rlimit. + * + * Returns the buffer, if successful; + * NULL, if out of memory or rlimit exceeded. + * + * size: the requested buffer size in bytes + * pages (out): if not NULL, contains the number of pages reserved + */ +static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) { - (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; + unsigned long rlim, vm, pgsz; + void *buffer; + + pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; + + rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; + vm = current->mm->total_vm + pgsz; + if (rlim < vm) + return 0; + + rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + vm = current->mm->locked_vm + pgsz; + if (rlim < vm) + return 0; + + buffer = kzalloc(size, GFP_KERNEL); + if (!buffer) + return 0; + + current->mm->total_vm += pgsz; + current->mm->locked_vm += pgsz; + + if (pages) + *pages = pgsz; + + return buffer; } -static inline unsigned long get_from_ip(char *base) + +static int ds_request(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl, enum ds_qualifier qual) { - return *(unsigned long *)(base + ds_cfg.from_ip.offset); + struct ds_context *context; + unsigned long buffer, adj; + const unsigned long alignment = (1 << 3); + int error = 0; + + if (!ds_cfg.sizeof_ds) + return -EOPNOTSUPP; + + /* we require some space to do alignment adjustments below */ + if (size < (alignment + ds_cfg.sizeof_rec[qual])) + return -EINVAL; + + /* buffer overflow notification is not yet implemented */ + if (ovfl) + return -EOPNOTSUPP; + + + spin_lock(&ds_lock); + + if (!check_tracer(task)) + return -EPERM; + + error = -ENOMEM; + context = ds_alloc_context(task); + if (!context) + goto out_unlock; + + error = -EALREADY; + if (context->owner[qual] == current) + goto out_unlock; + error = -EPERM; + if (context->owner[qual] != 0) + goto out_unlock; + context->owner[qual] = current; + + spin_unlock(&ds_lock); + + + error = -ENOMEM; + if (!base) { + base = ds_allocate_buffer(size, &context->pages[qual]); + if (!base) + goto out_release; + + context->buffer[qual] = base; + } + error = 0; + + context->callback[qual] = ovfl; + + /* adjust the buffer address and size to meet alignment + * constraints: + * - buffer is double-word aligned + * - size is multiple of record size + * + * We checked the size at the very beginning; we have enough + * space to do the adjustment. + */ + buffer = (unsigned long)base; + + adj = ALIGN(buffer, alignment) - buffer; + buffer += adj; + size -= adj; + + size /= ds_cfg.sizeof_rec[qual]; + size *= ds_cfg.sizeof_rec[qual]; + + ds_set(context->ds, qual, ds_buffer_base, buffer); + ds_set(context->ds, qual, ds_index, buffer); + ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); + + if (ovfl) { + /* todo: select a suitable interrupt threshold */ + } else + ds_set(context->ds, qual, + ds_interrupt_threshold, buffer + size + 1); + + /* we keep the context until ds_release */ + return error; + + out_release: + context->owner[qual] = 0; + ds_put_context(context); + return error; + + out_unlock: + spin_unlock(&ds_lock); + ds_put_context(context); + return error; } -static inline void set_from_ip(char *base, unsigned long value) + +int ds_request_bts(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl) { - (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value; + return ds_request(task, base, size, ovfl, ds_bts); } -static inline unsigned long get_to_ip(char *base) + +int ds_request_pebs(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl) { - return *(unsigned long *)(base + ds_cfg.to_ip.offset); + return ds_request(task, base, size, ovfl, ds_pebs); } -static inline void set_to_ip(char *base, unsigned long value) + +static int ds_release(struct task_struct *task, enum ds_qualifier qual) { - (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value; + struct ds_context *context; + int error; + + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + + kfree(context->buffer[qual]); + context->buffer[qual] = 0; + + current->mm->total_vm -= context->pages[qual]; + current->mm->locked_vm -= context->pages[qual]; + context->pages[qual] = 0; + context->owner[qual] = 0; + + /* + * we put the context twice: + * once for the ds_get_context + * once for the corresponding ds_request + */ + ds_put_context(context); + out: + ds_put_context(context); + return error; } -static inline unsigned char get_info_type(char *base) + +int ds_release_bts(struct task_struct *task) { - return *(unsigned char *)(base + ds_cfg.info_type.offset); + return ds_release(task, ds_bts); } -static inline void set_info_type(char *base, unsigned char value) + +int ds_release_pebs(struct task_struct *task) { - (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; + return ds_release(task, ds_pebs); } -static inline unsigned long get_info_data(char *base) + +static int ds_get_index(struct task_struct *task, size_t *pos, + enum ds_qualifier qual) { - return *(unsigned long *)(base + ds_cfg.info_data.offset); + struct ds_context *context; + unsigned long base, index; + int error; + + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + + base = ds_get(context->ds, qual, ds_buffer_base); + index = ds_get(context->ds, qual, ds_index); + + error = ((index - base) / ds_cfg.sizeof_rec[qual]); + if (pos) + *pos = error; + out: + ds_put_context(context); + return error; } -static inline void set_info_data(char *base, unsigned long value) + +int ds_get_bts_index(struct task_struct *task, size_t *pos) { - (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value; + return ds_get_index(task, pos, ds_bts); } +int ds_get_pebs_index(struct task_struct *task, size_t *pos) +{ + return ds_get_index(task, pos, ds_pebs); +} -int ds_allocate(void **dsp, size_t bts_size_in_bytes) +static int ds_get_end(struct task_struct *task, size_t *pos, + enum ds_qualifier qual) { - size_t bts_size_in_records; - unsigned long bts; - void *ds; + struct ds_context *context; + unsigned long base, end; + int error; + + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + + base = ds_get(context->ds, qual, ds_buffer_base); + end = ds_get(context->ds, qual, ds_absolute_maximum); + + error = ((end - base) / ds_cfg.sizeof_rec[qual]); + if (pos) + *pos = error; + out: + ds_put_context(context); + return error; +} - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; +int ds_get_bts_end(struct task_struct *task, size_t *pos) +{ + return ds_get_end(task, pos, ds_bts); +} - if (bts_size_in_bytes < 0) - return -EINVAL; +int ds_get_pebs_end(struct task_struct *task, size_t *pos) +{ + return ds_get_end(task, pos, ds_pebs); +} - bts_size_in_records = - bts_size_in_bytes / ds_cfg.sizeof_bts; - bts_size_in_bytes = - bts_size_in_records * ds_cfg.sizeof_bts; +static int ds_access(struct task_struct *task, size_t index, + const void **record, enum ds_qualifier qual) +{ + struct ds_context *context; + unsigned long base, idx; + int error; - if (bts_size_in_bytes <= 0) + if (!record) return -EINVAL; - bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL); + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; - if (!bts) - return -ENOMEM; + base = ds_get(context->ds, qual, ds_buffer_base); + idx = base + (index * ds_cfg.sizeof_rec[qual]); - ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); + error = -EINVAL; + if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) + goto out; - if (!ds) { - kfree((void *)bts); - return -ENOMEM; - } - - set_bts_buffer_base(ds, bts); - set_bts_index(ds, bts); - set_bts_absolute_maximum(ds, bts + bts_size_in_bytes); - set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1); - - *dsp = ds; - return 0; + *record = (const void *)idx; + error = ds_cfg.sizeof_rec[qual]; + out: + ds_put_context(context); + return error; } -int ds_free(void **dsp) +int ds_access_bts(struct task_struct *task, size_t index, const void **record) { - if (*dsp) { - kfree((void *)get_bts_buffer_base(*dsp)); - kfree(*dsp); - *dsp = NULL; - } - return 0; + return ds_access(task, index, record, ds_bts); } -int ds_get_bts_size(void *ds) +int ds_access_pebs(struct task_struct *task, size_t index, const void **record) { - int size_in_bytes; - - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; - - if (!ds) - return 0; - - size_in_bytes = - get_bts_absolute_maximum(ds) - - get_bts_buffer_base(ds); - return size_in_bytes; + return ds_access(task, index, record, ds_pebs); } -int ds_get_bts_end(void *ds) +static int ds_write(struct task_struct *task, const void *record, size_t size, + enum ds_qualifier qual, int force) { - int size_in_bytes = ds_get_bts_size(ds); - - if (size_in_bytes <= 0) - return size_in_bytes; + struct ds_context *context; + int error; - return size_in_bytes / ds_cfg.sizeof_bts; -} + if (!record) + return -EINVAL; -int ds_get_bts_index(void *ds) -{ - int index_offset_in_bytes; + error = -EPERM; + context = ds_get_context(task); + if (!context) + goto out; - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; + if (!force) { + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + } - index_offset_in_bytes = - get_bts_index(ds) - - get_bts_buffer_base(ds); + error = 0; + while (size) { + unsigned long base, index, end, write_end, int_th; + unsigned long write_size, adj_write_size; + + /* + * write as much as possible without producing an + * overflow interrupt. + * + * interrupt_threshold must either be + * - bigger than absolute_maximum or + * - point to a record between buffer_base and absolute_maximum + * + * index points to a valid record. + */ + base = ds_get(context->ds, qual, ds_buffer_base); + index = ds_get(context->ds, qual, ds_index); + end = ds_get(context->ds, qual, ds_absolute_maximum); + int_th = ds_get(context->ds, qual, ds_interrupt_threshold); + + write_end = min(end, int_th); + + /* if we are already beyond the interrupt threshold, + * we fill the entire buffer */ + if (write_end <= index) + write_end = end; + + if (write_end <= index) + goto out; + + write_size = min((unsigned long) size, write_end - index); + memcpy((void *)index, record, write_size); + + record = (const char *)record + write_size; + size -= write_size; + error += write_size; + + adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; + adj_write_size *= ds_cfg.sizeof_rec[qual]; + + /* zero out trailing bytes */ + memset((char *)index + write_size, 0, + adj_write_size - write_size); + index += adj_write_size; + + if (index >= end) + index = base; + ds_set(context->ds, qual, ds_index, index); + + if (index >= int_th) + ds_overflow(task, context, qual); + } - return index_offset_in_bytes / ds_cfg.sizeof_bts; + out: + ds_put_context(context); + return error; } -int ds_set_overflow(void *ds, int method) +int ds_write_bts(struct task_struct *task, const void *record, size_t size) { - switch (method) { - case DS_O_SIGNAL: - return -EOPNOTSUPP; - case DS_O_WRAP: - return 0; - default: - return -EINVAL; - } + return ds_write(task, record, size, ds_bts, /* force = */ 0); } -int ds_get_overflow(void *ds) +int ds_write_pebs(struct task_struct *task, const void *record, size_t size) { - return DS_O_WRAP; + return ds_write(task, record, size, ds_pebs, /* force = */ 0); } -int ds_clear(void *ds) +int ds_unchecked_write_bts(struct task_struct *task, + const void *record, size_t size) { - int bts_size = ds_get_bts_size(ds); - unsigned long bts_base; - - if (bts_size <= 0) - return bts_size; - - bts_base = get_bts_buffer_base(ds); - memset((void *)bts_base, 0, bts_size); - - set_bts_index(ds, bts_base); - return 0; + return ds_write(task, record, size, ds_bts, /* force = */ 1); } -int ds_read_bts(void *ds, int index, struct bts_struct *out) +int ds_unchecked_write_pebs(struct task_struct *task, + const void *record, size_t size) { - void *bts; + return ds_write(task, record, size, ds_pebs, /* force = */ 1); +} - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; +static int ds_reset_or_clear(struct task_struct *task, + enum ds_qualifier qual, int clear) +{ + struct ds_context *context; + unsigned long base, end; + int error; - if (index < 0) - return -EINVAL; + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; - if (index >= ds_get_bts_size(ds)) - return -EINVAL; + base = ds_get(context->ds, qual, ds_buffer_base); + end = ds_get(context->ds, qual, ds_absolute_maximum); - bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts)); + if (clear) + memset((void *)base, 0, end - base); - memset(out, 0, sizeof(*out)); - if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) { - out->qualifier = get_info_type(bts); - out->variant.jiffies = get_info_data(bts); - } else { - out->qualifier = BTS_BRANCH; - out->variant.lbr.from_ip = get_from_ip(bts); - out->variant.lbr.to_ip = get_to_ip(bts); - } + ds_set(context->ds, qual, ds_index, base); - return sizeof(*out);; + error = 0; + out: + ds_put_context(context); + return error; } -int ds_write_bts(void *ds, const struct bts_struct *in) +int ds_reset_bts(struct task_struct *task) { - unsigned long bts; - - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; - - if (ds_get_bts_size(ds) <= 0) - return -ENXIO; + return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); +} - bts = get_bts_index(ds); +int ds_reset_pebs(struct task_struct *task) +{ + return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); +} - memset((void *)bts, 0, ds_cfg.sizeof_bts); - switch (in->qualifier) { - case BTS_INVALID: - break; +int ds_clear_bts(struct task_struct *task) +{ + return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); +} - case BTS_BRANCH: - set_from_ip((void *)bts, in->variant.lbr.from_ip); - set_to_ip((void *)bts, in->variant.lbr.to_ip); - break; +int ds_clear_pebs(struct task_struct *task) +{ + return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); +} - case BTS_TASK_ARRIVES: - case BTS_TASK_DEPARTS: - set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS); - set_info_type((void *)bts, in->qualifier); - set_info_data((void *)bts, in->variant.jiffies); - break; +int ds_get_pebs_reset(struct task_struct *task, u64 *value) +{ + struct ds_context *context; + int error; - default: + if (!value) return -EINVAL; - } - bts = bts + ds_cfg.sizeof_bts; - if (bts >= get_bts_absolute_maximum(ds)) - bts = get_bts_buffer_base(ds); - set_bts_index(ds, bts); + context = ds_get_context(task); + error = ds_validate_access(context, ds_pebs); + if (error < 0) + goto out; - return ds_cfg.sizeof_bts; + *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); + + error = 0; + out: + ds_put_context(context); + return error; } -unsigned long ds_debugctl_mask(void) +int ds_set_pebs_reset(struct task_struct *task, u64 value) { - return ds_cfg.debugctl_mask; -} + struct ds_context *context; + int error; -#ifdef __i386__ -static const struct ds_configuration ds_cfg_netburst = { - .sizeof_ds = 9 * 4, - .bts_buffer_base = { 0, 4 }, - .bts_index = { 4, 4 }, - .bts_absolute_maximum = { 8, 4 }, - .bts_interrupt_threshold = { 12, 4 }, - .sizeof_bts = 3 * 4, - .from_ip = { 0, 4 }, - .to_ip = { 4, 4 }, - .info_type = { 4, 1 }, - .info_data = { 8, 4 }, - .debugctl_mask = (1<<2)|(1<<3) -}; + context = ds_get_context(task); + error = ds_validate_access(context, ds_pebs); + if (error < 0) + goto out; -static const struct ds_configuration ds_cfg_pentium_m = { - .sizeof_ds = 9 * 4, - .bts_buffer_base = { 0, 4 }, - .bts_index = { 4, 4 }, - .bts_absolute_maximum = { 8, 4 }, - .bts_interrupt_threshold = { 12, 4 }, - .sizeof_bts = 3 * 4, - .from_ip = { 0, 4 }, - .to_ip = { 4, 4 }, - .info_type = { 4, 1 }, - .info_data = { 8, 4 }, - .debugctl_mask = (1<<6)|(1<<7) + *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; + + error = 0; + out: + ds_put_context(context); + return error; +} + +static const struct ds_configuration ds_cfg_var = { + .sizeof_ds = sizeof(long) * 12, + .sizeof_field = sizeof(long), + .sizeof_rec[ds_bts] = sizeof(long) * 3, + .sizeof_rec[ds_pebs] = sizeof(long) * 10 }; -#endif /* _i386_ */ - -static const struct ds_configuration ds_cfg_core2 = { - .sizeof_ds = 9 * 8, - .bts_buffer_base = { 0, 8 }, - .bts_index = { 8, 8 }, - .bts_absolute_maximum = { 16, 8 }, - .bts_interrupt_threshold = { 24, 8 }, - .sizeof_bts = 3 * 8, - .from_ip = { 0, 8 }, - .to_ip = { 8, 8 }, - .info_type = { 8, 1 }, - .info_data = { 16, 8 }, - .debugctl_mask = (1<<6)|(1<<7)|(1<<9) +static const struct ds_configuration ds_cfg_64 = { + .sizeof_ds = 8 * 12, + .sizeof_field = 8, + .sizeof_rec[ds_bts] = 8 * 3, + .sizeof_rec[ds_pebs] = 8 * 10 }; static inline void @@ -429,14 +820,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) switch (c->x86) { case 0x6: switch (c->x86_model) { -#ifdef __i386__ case 0xD: case 0xE: /* Pentium M */ - ds_configure(&ds_cfg_pentium_m); + ds_configure(&ds_cfg_var); break; -#endif /* _i386_ */ case 0xF: /* Core2 */ - ds_configure(&ds_cfg_core2); + case 0x1C: /* Atom */ + ds_configure(&ds_cfg_64); break; default: /* sorry, don't know about them */ @@ -445,13 +835,11 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) break; case 0xF: switch (c->x86_model) { -#ifdef __i386__ case 0x0: case 0x1: case 0x2: /* Netburst */ - ds_configure(&ds_cfg_netburst); + ds_configure(&ds_cfg_var); break; -#endif /* _i386_ */ default: /* sorry, don't know about them */ break; @@ -462,3 +850,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) break; } } + +void ds_free(struct ds_context *context) +{ + /* This is called when the task owning the parameter context + * is dying. There should not be any user of that context left + * to disturb us, anymore. */ + unsigned long leftovers = context->count; + while (leftovers--) + ds_put_context(context); +} +#endif /* CONFIG_X86_DS */ diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f8476df..5cec8a7 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -316,6 +316,14 @@ void exit_thread(void) tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } +#ifdef CONFIG_X86_DS + /* Free any DS contexts that have not been properly released. */ + if (unlikely(current->thread.ds_ctx)) { + /* we clear debugctl to make sure DS is not used. */ + update_debugctlmsr(0); + ds_free(current->thread.ds_ctx); + } +#endif /* CONFIG_X86_DS */ } void flush_thread(void) @@ -482,18 +490,27 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, { struct thread_struct *prev, *next; unsigned long debugctl; + unsigned long ds_prev = 0, ds_next = 0; prev = &prev_p->thread; next = &next_p->thread; debugctl = prev->debugctlmsr; - if (next->ds_area_msr != prev->ds_area_msr) { + +#ifdef CONFIG_X86_DS + if (prev->ds_ctx) + ds_prev = (unsigned long)prev->ds_ctx->ds; + if (next->ds_ctx) + ds_next = (unsigned long)next->ds_ctx->ds; + + if (ds_next != ds_prev) { /* we clear debugctl to make sure DS * is not in use when we change it */ debugctl = 0; update_debugctlmsr(0); - wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0); + wrmsr(MSR_IA32_DS_AREA, ds_next, 0); } +#endif /* CONFIG_X86_DS */ if (next->debugctlmsr != debugctl) update_debugctlmsr(next->debugctlmsr); @@ -517,13 +534,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, hard_enable_TSC(); } -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); -#endif +#endif /* CONFIG_X86_PTRACE_BTS */ if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e2319f3..ad21349 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -267,6 +267,14 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } +#ifdef CONFIG_X86_DS + /* Free any DS contexts that have not been properly released. */ + if (unlikely(t->ds_ctx)) { + /* we clear debugctl to make sure DS is not used. */ + update_debugctlmsr(0); + ds_free(t->ds_ctx); + } +#endif /* CONFIG_X86_DS */ } void flush_thread(void) @@ -492,18 +500,27 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, { struct thread_struct *prev, *next; unsigned long debugctl; + unsigned long ds_prev = 0, ds_next = 0; prev = &prev_p->thread, next = &next_p->thread; debugctl = prev->debugctlmsr; - if (next->ds_area_msr != prev->ds_area_msr) { + +#ifdef CONFIG_X86_DS + if (prev->ds_ctx) + ds_prev = (unsigned long)prev->ds_ctx->ds; + if (next->ds_ctx) + ds_next = (unsigned long)next->ds_ctx->ds; + + if (ds_next != ds_prev) { /* we clear debugctl to make sure DS * is not in use when we change it */ debugctl = 0; update_debugctlmsr(0); - wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); + wrmsrl(MSR_IA32_DS_AREA, ds_next); } +#endif /* CONFIG_X86_DS */ if (next->debugctlmsr != debugctl) update_debugctlmsr(next->debugctlmsr); @@ -541,13 +558,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); } -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); -#endif +#endif /* CONFIG_X86_PTRACE_BTS */ } /* diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index fb03ef3..b7ff783 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -554,45 +554,115 @@ static int ptrace_set_debugreg(struct task_struct *child, return 0; } -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS +/* + * The configuration for a particular BTS hardware implementation. + */ +struct bts_configuration { + /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */ + unsigned char sizeof_bts; + /* the size of a field in the BTS record in bytes */ + unsigned char sizeof_field; + /* a bitmask to enable/disable BTS in DEBUGCTL MSR */ + unsigned long debugctl_mask; +}; +static struct bts_configuration bts_cfg; + +#define BTS_MAX_RECORD_SIZE (8 * 3) + + +/* + * Branch Trace Store (BTS) uses the following format. Different + * architectures vary in the size of those fields. + * - source linear address + * - destination linear address + * - flags + * + * Later architectures use 64bit pointers throughout, whereas earlier + * architectures use 32bit pointers in 32bit mode. + * + * We compute the base address for the first 8 fields based on: + * - the field size stored in the DS configuration + * - the relative field position + * + * In order to store additional information in the BTS buffer, we use + * a special source address to indicate that the record requires + * special interpretation. + * + * Netburst indicated via a bit in the flags field whether the branch + * was predicted; this is ignored. + */ -static int ptrace_bts_get_size(struct task_struct *child) +enum bts_field { + bts_from = 0, + bts_to, + bts_flags, + + bts_escape = (unsigned long)-1, + bts_qual = bts_to, + bts_jiffies = bts_flags +}; + +static inline unsigned long bts_get(const char *base, enum bts_field field) { - if (!child->thread.ds_area_msr) - return -ENXIO; + base += (bts_cfg.sizeof_field * field); + return *(unsigned long *)base; +} - return ds_get_bts_index((void *)child->thread.ds_area_msr); +static inline void bts_set(char *base, enum bts_field field, unsigned long val) +{ + base += (bts_cfg.sizeof_field * field);; + (*(unsigned long *)base) = val; } -static int ptrace_bts_read_record(struct task_struct *child, - long index, +/* + * Translate a BTS record from the raw format into the bts_struct format + * + * out (out): bts_struct interpretation + * raw: raw BTS record + */ +static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw) +{ + memset(out, 0, sizeof(*out)); + if (bts_get(raw, bts_from) == bts_escape) { + out->qualifier = bts_get(raw, bts_qual); + out->variant.jiffies = bts_get(raw, bts_jiffies); + } else { + out->qualifier = BTS_BRANCH; + out->variant.lbr.from_ip = bts_get(raw, bts_from); + out->variant.lbr.to_ip = bts_get(raw, bts_to); + } +} + +static int ptrace_bts_read_record(struct task_struct *child, size_t index, struct bts_struct __user *out) { struct bts_struct ret; - int retval; - int bts_end; - int bts_index; - - if (!child->thread.ds_area_msr) - return -ENXIO; + const void *bts_record; + size_t bts_index, bts_end; + int error; - if (index < 0) - return -EINVAL; + error = ds_get_bts_end(child, &bts_end); + if (error < 0) + return error; - bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr); if (bts_end <= index) return -EINVAL; + error = ds_get_bts_index(child, &bts_index); + if (error < 0) + return error; + /* translate the ptrace bts index into the ds bts index */ - bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr); - bts_index -= (index + 1); - if (bts_index < 0) - bts_index += bts_end; + bts_index += bts_end - (index + 1); + if (bts_end <= bts_index) + bts_index -= bts_end; - retval = ds_read_bts((void *)child->thread.ds_area_msr, - bts_index, &ret); - if (retval < 0) - return retval; + error = ds_access_bts(child, bts_index, &bts_record); + if (error < 0) + return error; + + ptrace_bts_translate_record(&ret, bts_record); if (copy_to_user(out, &ret, sizeof(ret))) return -EFAULT; @@ -600,101 +670,106 @@ static int ptrace_bts_read_record(struct task_struct *child, return sizeof(ret); } -static int ptrace_bts_clear(struct task_struct *child) -{ - if (!child->thread.ds_area_msr) - return -ENXIO; - - return ds_clear((void *)child->thread.ds_area_msr); -} - static int ptrace_bts_drain(struct task_struct *child, long size, struct bts_struct __user *out) { - int end, i; - void *ds = (void *)child->thread.ds_area_msr; - - if (!ds) - return -ENXIO; + struct bts_struct ret; + const unsigned char *raw; + size_t end, i; + int error; - end = ds_get_bts_index(ds); - if (end <= 0) - return end; + error = ds_get_bts_index(child, &end); + if (error < 0) + return error; if (size < (end * sizeof(struct bts_struct))) return -EIO; - for (i = 0; i < end; i++, out++) { - struct bts_struct ret; - int retval; + error = ds_access_bts(child, 0, (const void **)&raw); + if (error < 0) + return error; - retval = ds_read_bts(ds, i, &ret); - if (retval < 0) - return retval; + for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) { + ptrace_bts_translate_record(&ret, raw); if (copy_to_user(out, &ret, sizeof(ret))) return -EFAULT; } - ds_clear(ds); + error = ds_clear_bts(child); + if (error < 0) + return error; return end; } +static void ptrace_bts_ovfl(struct task_struct *child) +{ + send_sig(child->thread.bts_ovfl_signal, child, 0); +} + static int ptrace_bts_config(struct task_struct *child, long cfg_size, const struct ptrace_bts_config __user *ucfg) { struct ptrace_bts_config cfg; - int bts_size, ret = 0; - void *ds; + int error = 0; + + error = -EOPNOTSUPP; + if (!bts_cfg.sizeof_bts) + goto errout; + error = -EIO; if (cfg_size < sizeof(cfg)) - return -EIO; + goto errout; + error = -EFAULT; if (copy_from_user(&cfg, ucfg, sizeof(cfg))) - return -EFAULT; + goto errout; - if ((int)cfg.size < 0) - return -EINVAL; + error = -EINVAL; + if ((cfg.flags & PTRACE_BTS_O_SIGNAL) && + !(cfg.flags & PTRACE_BTS_O_ALLOC)) + goto errout; - bts_size = 0; - ds = (void *)child->thread.ds_area_msr; - if (ds) { - bts_size = ds_get_bts_size(ds); - if (bts_size < 0) - return bts_size; - } - cfg.size = PAGE_ALIGN(cfg.size); + if (cfg.flags & PTRACE_BTS_O_ALLOC) { + ds_ovfl_callback_t ovfl = 0; + unsigned int sig = 0; - if (bts_size != cfg.size) { - ret = ptrace_bts_realloc(child, cfg.size, - cfg.flags & PTRACE_BTS_O_CUT_SIZE); - if (ret < 0) + /* we ignore the error in case we were not tracing child */ + (void)ds_release_bts(child); + + if (cfg.flags & PTRACE_BTS_O_SIGNAL) { + if (!cfg.signal) + goto errout; + + sig = cfg.signal; + ovfl = ptrace_bts_ovfl; + } + + error = ds_request_bts(child, /* base = */ 0, cfg.size, ovfl); + if (error < 0) goto errout; - ds = (void *)child->thread.ds_area_msr; + child->thread.bts_ovfl_signal = sig; } - if (cfg.flags & PTRACE_BTS_O_SIGNAL) - ret = ds_set_overflow(ds, DS_O_SIGNAL); - else - ret = ds_set_overflow(ds, DS_O_WRAP); - if (ret < 0) + error = -EINVAL; + if (!child->thread.ds_ctx && cfg.flags) goto errout; if (cfg.flags & PTRACE_BTS_O_TRACE) - child->thread.debugctlmsr |= ds_debugctl_mask(); + child->thread.debugctlmsr |= bts_cfg.debugctl_mask; else - child->thread.debugctlmsr &= ~ds_debugctl_mask(); + child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; if (cfg.flags & PTRACE_BTS_O_SCHED) set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); else clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); - ret = sizeof(cfg); + error = sizeof(cfg); out: if (child->thread.debugctlmsr) @@ -702,10 +777,10 @@ out: else clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - return ret; + return error; errout: - child->thread.debugctlmsr &= ~ds_debugctl_mask(); + child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); goto out; } @@ -714,29 +789,40 @@ static int ptrace_bts_status(struct task_struct *child, long cfg_size, struct ptrace_bts_config __user *ucfg) { - void *ds = (void *)child->thread.ds_area_msr; struct ptrace_bts_config cfg; + size_t end; + const void *base, *max; + int error; if (cfg_size < sizeof(cfg)) return -EIO; - memset(&cfg, 0, sizeof(cfg)); + error = ds_get_bts_end(child, &end); + if (error < 0) + return error; - if (ds) { - cfg.size = ds_get_bts_size(ds); + error = ds_access_bts(child, /* index = */ 0, &base); + if (error < 0) + return error; - if (ds_get_overflow(ds) == DS_O_SIGNAL) - cfg.flags |= PTRACE_BTS_O_SIGNAL; + error = ds_access_bts(child, /* index = */ end, &max); + if (error < 0) + return error; - if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && - child->thread.debugctlmsr & ds_debugctl_mask()) - cfg.flags |= PTRACE_BTS_O_TRACE; + memset(&cfg, 0, sizeof(cfg)); + cfg.size = (max - base); + cfg.signal = child->thread.bts_ovfl_signal; + cfg.bts_size = sizeof(struct bts_struct); - if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) - cfg.flags |= PTRACE_BTS_O_SCHED; - } + if (cfg.signal) + cfg.flags |= PTRACE_BTS_O_SIGNAL; - cfg.bts_size = sizeof(struct bts_struct); + if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && + child->thread.debugctlmsr & bts_cfg.debugctl_mask) + cfg.flags |= PTRACE_BTS_O_TRACE; + + if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) + cfg.flags |= PTRACE_BTS_O_SCHED; if (copy_to_user(ucfg, &cfg, sizeof(cfg))) return -EFAULT; @@ -744,89 +830,38 @@ static int ptrace_bts_status(struct task_struct *child, return sizeof(cfg); } - static int ptrace_bts_write_record(struct task_struct *child, const struct bts_struct *in) { - int retval; + unsigned char bts_record[BTS_MAX_RECORD_SIZE]; - if (!child->thread.ds_area_msr) - return -ENXIO; + BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts); - retval = ds_write_bts((void *)child->thread.ds_area_msr, in); - if (retval) - return retval; + memset(bts_record, 0, bts_cfg.sizeof_bts); + switch (in->qualifier) { + case BTS_INVALID: + break; - return sizeof(*in); -} + case BTS_BRANCH: + bts_set(bts_record, bts_from, in->variant.lbr.from_ip); + bts_set(bts_record, bts_to, in->variant.lbr.to_ip); + break; -static int ptrace_bts_realloc(struct task_struct *child, - int size, int reduce_size) -{ - unsigned long rlim, vm; - int ret, old_size; + case BTS_TASK_ARRIVES: + case BTS_TASK_DEPARTS: + bts_set(bts_record, bts_from, bts_escape); + bts_set(bts_record, bts_qual, in->qualifier); + bts_set(bts_record, bts_jiffies, in->variant.jiffies); + break; - if (size < 0) + default: return -EINVAL; - - old_size = ds_get_bts_size((void *)child->thread.ds_area_msr); - if (old_size < 0) - return old_size; - - ret = ds_free((void **)&child->thread.ds_area_msr); - if (ret < 0) - goto out; - - size >>= PAGE_SHIFT; - old_size >>= PAGE_SHIFT; - - current->mm->total_vm -= old_size; - current->mm->locked_vm -= old_size; - - if (size == 0) - goto out; - - rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; - vm = current->mm->total_vm + size; - if (rlim < vm) { - ret = -ENOMEM; - - if (!reduce_size) - goto out; - - size = rlim - current->mm->total_vm; - if (size <= 0) - goto out; } - rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; - vm = current->mm->locked_vm + size; - if (rlim < vm) { - ret = -ENOMEM; - - if (!reduce_size) - goto out; - - size = rlim - current->mm->locked_vm; - if (size <= 0) - goto out; - } - - ret = ds_allocate((void **)&child->thread.ds_area_msr, - size << PAGE_SHIFT); - if (ret < 0) - goto out; - - current->mm->total_vm += size; - current->mm->locked_vm += size; - -out: - if (child->thread.ds_area_msr) - set_tsk_thread_flag(child, TIF_DS_AREA_MSR); - else - clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); - - return ret; + /* The writing task will be the switched-to task on a context + * switch. It needs to write into the switched-from task's BTS + * buffer. */ + return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts); } void ptrace_bts_take_timestamp(struct task_struct *tsk, @@ -839,7 +874,66 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk, ptrace_bts_write_record(tsk, &rec); } -#endif /* X86_BTS */ + +static const struct bts_configuration bts_cfg_netburst = { + .sizeof_bts = sizeof(long) * 3, + .sizeof_field = sizeof(long), + .debugctl_mask = (1<<2)|(1<<3)|(1<<5) +}; + +static const struct bts_configuration bts_cfg_pentium_m = { + .sizeof_bts = sizeof(long) * 3, + .sizeof_field = sizeof(long), + .debugctl_mask = (1<<6)|(1<<7) +}; + +static const struct bts_configuration bts_cfg_core2 = { + .sizeof_bts = 8 * 3, + .sizeof_field = 8, + .debugctl_mask = (1<<6)|(1<<7)|(1<<9) +}; + +static inline void bts_configure(const struct bts_configuration *cfg) +{ + bts_cfg = *cfg; +} + +void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c) +{ + switch (c->x86) { + case 0x6: + switch (c->x86_model) { + case 0xD: + case 0xE: /* Pentium M */ + bts_configure(&bts_cfg_pentium_m); + break; + case 0xF: /* Core2 */ + case 0x1C: /* Atom */ + bts_configure(&bts_cfg_core2); + break; + default: + /* sorry, don't know about them */ + break; + } + break; + case 0xF: + switch (c->x86_model) { + case 0x0: + case 0x1: + case 0x2: /* Netburst */ + bts_configure(&bts_cfg_netburst); + break; + default: + /* sorry, don't know about them */ + break; + } + break; + default: + /* sorry, don't know about them */ + break; + } +} +#endif /* CONFIG_X86_PTRACE_BTS */ /* * Called by kernel/ptrace.c when detaching.. @@ -852,15 +946,15 @@ void ptrace_disable(struct task_struct *child) #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); #endif - if (child->thread.ds_area_msr) { -#ifdef X86_BTS - ptrace_bts_realloc(child, 0, 0); -#endif - child->thread.debugctlmsr &= ~ds_debugctl_mask(); - if (!child->thread.debugctlmsr) - clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); - } +#ifdef CONFIG_X86_PTRACE_BTS + (void)ds_release_bts(child); + + child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; + if (!child->thread.debugctlmsr) + clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); + + clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); +#endif /* CONFIG_X86_PTRACE_BTS */ } #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION @@ -980,7 +1074,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) /* * These bits need more cooking - not enabled yet: */ -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS case PTRACE_BTS_CONFIG: ret = ptrace_bts_config (child, data, (struct ptrace_bts_config __user *)addr); @@ -992,7 +1086,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; case PTRACE_BTS_SIZE: - ret = ptrace_bts_get_size(child); + ret = ds_get_bts_index(child, /* pos = */ 0); break; case PTRACE_BTS_GET: @@ -1001,14 +1095,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; case PTRACE_BTS_CLEAR: - ret = ptrace_bts_clear(child); + ret = ds_clear_bts(child); break; case PTRACE_BTS_DRAIN: ret = ptrace_bts_drain (child, data, (struct bts_struct __user *) addr); break; -#endif +#endif /* CONFIG_X86_PTRACE_BTS */ default: ret = ptrace_request(child, request, addr, data); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index f2fc8fe..f7aebe1 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -920,11 +920,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_BTS); if (!(l1 & (1<<12))) set_cpu_cap(c, X86_FEATURE_PEBS); + ds_init_intel(c); } if (cpu_has_bts) - ds_init_intel(c); + ptrace_bts_init_intel(c); n = c->extended_cpuid_level; if (n >= 0x80000008) { diff --git a/include/asm-x86/ds.h b/include/asm-x86/ds.h index 7881368..72c5a19 100644 --- a/include/asm-x86/ds.h +++ b/include/asm-x86/ds.h @@ -2,71 +2,237 @@ * Debug Store (DS) support * * This provides a low-level interface to the hardware's Debug Store - * feature that is used for last branch recording (LBR) and + * feature that is used for branch trace store (BTS) and * precise-event based sampling (PEBS). * - * Different architectures use a different DS layout/pointer size. - * The below functions therefore work on a void*. + * It manages: + * - per-thread and per-cpu allocation of BTS and PEBS + * - buffer memory allocation (optional) + * - buffer overflow handling + * - buffer access * + * It assumes: + * - get_task_struct on all parameter tasks + * - current is allowed to trace parameter tasks * - * Since there is no user for PEBS, yet, only LBR (or branch - * trace store, BTS) is supported. * - * - * Copyright (C) 2007 Intel Corporation. - * Markus Metzger , Dec 2007 + * Copyright (C) 2007-2008 Intel Corporation. + * Markus Metzger , 2007-2008 */ #ifndef _ASM_X86_DS_H #define _ASM_X86_DS_H +#ifdef CONFIG_X86_DS + #include #include -struct cpuinfo_x86; +struct task_struct; -/* a branch trace record entry +/* + * Request BTS or PEBS + * + * Due to alignement constraints, the actual buffer may be slightly + * smaller than the requested or provided buffer. * - * In order to unify the interface between various processor versions, - * we use the below data structure for all processors. + * Returns 0 on success; -Eerrno otherwise + * + * task: the task to request recording for; + * NULL for per-cpu recording on the current cpu + * base: the base pointer for the (non-pageable) buffer; + * NULL if buffer allocation requested + * size: the size of the requested or provided buffer + * ovfl: pointer to a function to be called on buffer overflow; + * NULL if cyclic buffer requested */ -enum bts_qualifier { - BTS_INVALID = 0, - BTS_BRANCH, - BTS_TASK_ARRIVES, - BTS_TASK_DEPARTS -}; +typedef void (*ds_ovfl_callback_t)(struct task_struct *); +extern int ds_request_bts(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl); +extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl); + +/* + * Release BTS or PEBS resources + * + * Frees buffers allocated on ds_request. + * + * Returns 0 on success; -Eerrno otherwise + * + * task: the task to release resources for; + * NULL to release resources for the current cpu + */ +extern int ds_release_bts(struct task_struct *task); +extern int ds_release_pebs(struct task_struct *task); + +/* + * Return the (array) index of the write pointer. + * (assuming an array of BTS/PEBS records) + * + * Returns -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * pos (out): if not NULL, will hold the result + */ +extern int ds_get_bts_index(struct task_struct *task, size_t *pos); +extern int ds_get_pebs_index(struct task_struct *task, size_t *pos); + +/* + * Return the (array) index one record beyond the end of the array. + * (assuming an array of BTS/PEBS records) + * + * Returns -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * pos (out): if not NULL, will hold the result + */ +extern int ds_get_bts_end(struct task_struct *task, size_t *pos); +extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); + +/* + * Provide a pointer to the BTS/PEBS record at parameter index. + * (assuming an array of BTS/PEBS records) + * + * The pointer points directly into the buffer. The user is + * responsible for copying the record. + * + * Returns the size of a single record on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * index: the index of the requested record + * record (out): pointer to the requested record + */ +extern int ds_access_bts(struct task_struct *task, + size_t index, const void **record); +extern int ds_access_pebs(struct task_struct *task, + size_t index, const void **record); + +/* + * Write one or more BTS/PEBS records at the write pointer index and + * advance the write pointer. + * + * If size is not a multiple of the record size, trailing bytes are + * zeroed out. + * + * May result in one or more overflow notifications. + * + * If called during overflow handling, that is, with index >= + * interrupt threshold, the write will wrap around. + * + * An overflow notification is given if and when the interrupt + * threshold is reached during or after the write. + * + * Returns the number of bytes written or -Eerrno. + * + * task: the task to access; + * NULL to access the current cpu + * buffer: the buffer to write + * size: the size of the buffer + */ +extern int ds_write_bts(struct task_struct *task, + const void *buffer, size_t size); +extern int ds_write_pebs(struct task_struct *task, + const void *buffer, size_t size); + +/* + * Same as ds_write_bts/pebs, but omit ownership checks. + * + * This is needed to have some other task than the owner of the + * BTS/PEBS buffer or the parameter task itself write into the + * respective buffer. + */ +extern int ds_unchecked_write_bts(struct task_struct *task, + const void *buffer, size_t size); +extern int ds_unchecked_write_pebs(struct task_struct *task, + const void *buffer, size_t size); + +/* + * Reset the write pointer of the BTS/PEBS buffer. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + */ +extern int ds_reset_bts(struct task_struct *task); +extern int ds_reset_pebs(struct task_struct *task); + +/* + * Clear the BTS/PEBS buffer and reset the write pointer. + * The entire buffer will be zeroed out. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + */ +extern int ds_clear_bts(struct task_struct *task); +extern int ds_clear_pebs(struct task_struct *task); + +/* + * Provide the PEBS counter reset value. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * value (out): the counter reset value + */ +extern int ds_get_pebs_reset(struct task_struct *task, u64 *value); + +/* + * Set the PEBS counter reset value. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * value: the new counter reset value + */ +extern int ds_set_pebs_reset(struct task_struct *task, u64 value); + +/* + * Initialization + */ +struct cpuinfo_x86; +extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); + + -struct bts_struct { - u64 qualifier; - union { - /* BTS_BRANCH */ - struct { - u64 from_ip; - u64 to_ip; - } lbr; - /* BTS_TASK_ARRIVES or - BTS_TASK_DEPARTS */ - u64 jiffies; - } variant; +/* + * The DS context - part of struct thread_struct. + */ +struct ds_context { + /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ + unsigned char *ds; + /* the owner of the BTS and PEBS configuration, respectively */ + struct task_struct *owner[2]; + /* buffer overflow notification function for BTS and PEBS */ + ds_ovfl_callback_t callback[2]; + /* the original buffer address */ + void *buffer[2]; + /* the number of allocated pages for on-request allocated buffers */ + unsigned int pages[2]; + /* use count */ + unsigned long count; + /* a pointer to the context location inside the thread_struct + * or the per_cpu context array */ + struct ds_context **this; + /* a pointer to the task owning this context, or NULL, if the + * context is owned by a cpu */ + struct task_struct *task; }; -/* Overflow handling mechanisms */ -#define DS_O_SIGNAL 1 /* send overflow signal */ -#define DS_O_WRAP 2 /* wrap around */ - -extern int ds_allocate(void **, size_t); -extern int ds_free(void **); -extern int ds_get_bts_size(void *); -extern int ds_get_bts_end(void *); -extern int ds_get_bts_index(void *); -extern int ds_set_overflow(void *, int); -extern int ds_get_overflow(void *); -extern int ds_clear(void *); -extern int ds_read_bts(void *, int, struct bts_struct *); -extern int ds_write_bts(void *, const struct bts_struct *); -extern unsigned long ds_debugctl_mask(void); -extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *c); +/* called by exit_thread() to free leftover contexts */ +extern void ds_free(struct ds_context *context); + +#else /* CONFIG_X86_DS */ + +#define ds_init_intel(config) do {} while (0) +#endif /* CONFIG_X86_DS */ #endif /* _ASM_X86_DS_H */ diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 5591052..beaccb7 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -20,6 +20,7 @@ struct mm_struct; #include #include #include +#include #include #include @@ -415,9 +416,14 @@ struct thread_struct { unsigned io_bitmap_max; /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ unsigned long debugctlmsr; -/* Debug Store - if not 0 points to a DS Save Area configuration; - * goes into MSR_IA32_DS_AREA */ - unsigned long ds_area_msr; +#ifdef CONFIG_X86_DS +/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */ + struct ds_context *ds_ctx; +#endif /* CONFIG_X86_DS */ +#ifdef CONFIG_X86_PTRACE_BTS +/* the signal to send on a bts buffer overflow */ + unsigned int bts_ovfl_signal; +#endif /* CONFIG_X86_PTRACE_BTS */ }; static inline unsigned long native_get_debugreg(int regno) diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h index f224eb3..9bcaa75 100644 --- a/include/asm-x86/ptrace-abi.h +++ b/include/asm-x86/ptrace-abi.h @@ -80,8 +80,9 @@ #define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */ -#ifndef __ASSEMBLY__ +#ifdef CONFIG_X86_PTRACE_BTS +#ifndef __ASSEMBLY__ #include /* configuration/status structure used in PTRACE_BTS_CONFIG and @@ -97,20 +98,20 @@ struct ptrace_bts_config { /* actual size of bts_struct in bytes */ __u32 bts_size; }; -#endif +#endif /* __ASSEMBLY__ */ #define PTRACE_BTS_O_TRACE 0x1 /* branch trace */ #define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */ #define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG on buffer overflow instead of wrapping around */ -#define PTRACE_BTS_O_CUT_SIZE 0x8 /* cut requested size to max available - instead of failing */ +#define PTRACE_BTS_O_ALLOC 0x8 /* (re)allocate buffer */ #define PTRACE_BTS_CONFIG 40 /* Configure branch trace recording. ADDR points to a struct ptrace_bts_config. DATA gives the size of that buffer. - A new buffer is allocated, iff the size changes. + A new buffer is allocated, if requested in the flags. + An overflow signal may only be requested for new buffers. Returns the number of bytes read. */ #define PTRACE_BTS_STATUS 41 @@ -119,7 +120,7 @@ struct ptrace_bts_config { Returns the number of bytes written. */ #define PTRACE_BTS_SIZE 42 -/* Return the number of available BTS records. +/* Return the number of available BTS records for draining. DATA and ADDR are ignored. */ #define PTRACE_BTS_GET 43 @@ -139,5 +140,6 @@ struct ptrace_bts_config { BTS records are read from oldest to newest. Returns number of BTS records drained. */ +#endif /* CONFIG_X86_PTRACE_BTS */ #endif diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h index 9f922b0..6303701 100644 --- a/include/asm-x86/ptrace.h +++ b/include/asm-x86/ptrace.h @@ -125,14 +125,48 @@ struct pt_regs { #endif /* __KERNEL__ */ #endif /* !__i386__ */ + +#ifdef CONFIG_X86_PTRACE_BTS +/* a branch trace record entry + * + * In order to unify the interface between various processor versions, + * we use the below data structure for all processors. + */ +enum bts_qualifier { + BTS_INVALID = 0, + BTS_BRANCH, + BTS_TASK_ARRIVES, + BTS_TASK_DEPARTS +}; + +struct bts_struct { + __u64 qualifier; + union { + /* BTS_BRANCH */ + struct { + __u64 from_ip; + __u64 to_ip; + } lbr; + /* BTS_TASK_ARRIVES or + BTS_TASK_DEPARTS */ + __u64 jiffies; + } variant; +}; +#endif /* CONFIG_X86_PTRACE_BTS */ + #ifdef __KERNEL__ -/* the DS BTS struct is used for ptrace as well */ -#include +#include +struct cpuinfo_x86; struct task_struct; +#ifdef CONFIG_X86_PTRACE_BTS +extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *); extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier); +#else +#define ptrace_bts_init_intel(config) do {} while (0) +#endif /* CONFIG_X86_PTRACE_BTS */ extern unsigned long profile_pc(struct pt_regs *regs); -- cgit v0.10.2 From 970e725098a6da5a9c1f8128102c812e31a0444c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 16 Apr 2008 16:40:17 -0700 Subject: x86, ptrace: PEBS support, warning fix arch/x86/kernel/process_32.c:566: warning: unused variable 'ds_next' arch/x86/kernel/process_32.c:566: warning: unused variable 'ds_prev' Cc: Markus Metzger Cc: Andi Kleen Cc: "H. Peter Anvin" Cc: "Siddha, Suresh B" Cc: Roland McGrath Cc: Michael Kerrisk Cc: Cc: stephane eranian Cc: Jason Wessel Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 5cec8a7..496ea31 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -484,20 +484,13 @@ int set_tsc_mode(unsigned int val) return 0; } -static noinline void -__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) +#ifdef CONFIG_X86_DS +static int update_debugctl(struct thread_struct *prev, + struct thread_struct *next, unsigned long debugctl) { - struct thread_struct *prev, *next; - unsigned long debugctl; - unsigned long ds_prev = 0, ds_next = 0; + unsigned long ds_prev = 0; + unsigned long ds_next = 0; - prev = &prev_p->thread; - next = &next_p->thread; - - debugctl = prev->debugctlmsr; - -#ifdef CONFIG_X86_DS if (prev->ds_ctx) ds_prev = (unsigned long)prev->ds_ctx->ds; if (next->ds_ctx) @@ -510,8 +503,28 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, update_debugctlmsr(0); wrmsr(MSR_IA32_DS_AREA, ds_next, 0); } + return debugctl; +} +#else +static int update_debugctl(struct thread_struct *prev, + struct thread_struct *next, unsigned long debugctl) +{ + return debugctl; +} #endif /* CONFIG_X86_DS */ +static noinline void +__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + struct tss_struct *tss) +{ + struct thread_struct *prev, *next; + unsigned long debugctl; + + prev = &prev_p->thread; + next = &next_p->thread; + + debugctl = update_debugctl(prev, next, prev->debugctlmsr); + if (next->debugctlmsr != debugctl) update_debugctlmsr(next->debugctlmsr); -- cgit v0.10.2 From 573da4224e8c3800e613d715e909c3179a7e3cb2 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 28 Apr 2008 23:15:04 +0400 Subject: x86: DS cleanup - dont treat 0 as NULL Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 5b32b6d..24a323c 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -238,12 +238,12 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task) context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) - return 0; + return NULL; context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); if (!context->ds) { kfree(context); - return 0; + return NULL; } *p_context = context; @@ -279,7 +279,7 @@ static inline void ds_put_context(struct ds_context *context) if (--context->count) goto out; - *(context->this) = 0; + *(context->this) = NULL; if (context->task) clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); @@ -341,16 +341,16 @@ static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; vm = current->mm->total_vm + pgsz; if (rlim < vm) - return 0; + return NULL; rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; vm = current->mm->locked_vm + pgsz; if (rlim < vm) - return 0; + return NULL; buffer = kzalloc(size, GFP_KERNEL); if (!buffer) - return 0; + return NULL; current->mm->total_vm += pgsz; current->mm->locked_vm += pgsz; @@ -395,7 +395,7 @@ static int ds_request(struct task_struct *task, void *base, size_t size, if (context->owner[qual] == current) goto out_unlock; error = -EPERM; - if (context->owner[qual] != 0) + if (context->owner[qual] != NULL) goto out_unlock; context->owner[qual] = current; @@ -445,7 +445,7 @@ static int ds_request(struct task_struct *task, void *base, size_t size, return error; out_release: - context->owner[qual] = 0; + context->owner[qual] = NULL; ds_put_context(context); return error; @@ -825,7 +825,7 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) ds_configure(&ds_cfg_var); break; case 0xF: /* Core2 */ - case 0x1C: /* Atom */ + case 0x1C: /* Atom */ ds_configure(&ds_cfg_64); break; default: -- cgit v0.10.2 From 34b2cd5b688b012975fcfc3b3970fc3508fa82c4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 17 May 2008 08:30:07 +0200 Subject: x86: PEBS cleanup Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ad21349..4a93c98 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -500,7 +500,6 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, { struct thread_struct *prev, *next; unsigned long debugctl; - unsigned long ds_prev = 0, ds_next = 0; prev = &prev_p->thread, next = &next_p->thread; @@ -508,17 +507,23 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, debugctl = prev->debugctlmsr; #ifdef CONFIG_X86_DS - if (prev->ds_ctx) - ds_prev = (unsigned long)prev->ds_ctx->ds; - if (next->ds_ctx) - ds_next = (unsigned long)next->ds_ctx->ds; - - if (ds_next != ds_prev) { - /* we clear debugctl to make sure DS - * is not in use when we change it */ - debugctl = 0; - update_debugctlmsr(0); - wrmsrl(MSR_IA32_DS_AREA, ds_next); + { + unsigned long ds_prev = 0, ds_next = 0; + + if (prev->ds_ctx) + ds_prev = (unsigned long)prev->ds_ctx->ds; + if (next->ds_ctx) + ds_next = (unsigned long)next->ds_ctx->ds; + + if (ds_next != ds_prev) { + /* + * We clear debugctl to make sure DS + * is not in use when we change it: + */ + debugctl = 0; + update_debugctlmsr(0); + wrmsrl(MSR_IA32_DS_AREA, ds_next); + } } #endif /* CONFIG_X86_DS */ -- cgit v0.10.2 From 63cc8c75156462d4b42cbdd76c293b7eee7ddbfe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 May 2008 15:44:40 +0200 Subject: percpu: introduce DEFINE_PER_CPU_PAGE_ALIGNED() macro While examining holes in percpu section I found this : c05f5000 D per_cpu__current_task c05f5000 D __per_cpu_start c05f5004 D per_cpu__cpu_number c05f5008 D per_cpu__irq_regs c05f500c d per_cpu__cpu_devices c05f5040 D per_cpu__cyc2ns c05f6000 d per_cpu__cpuid4_info c05f6004 d per_cpu__cache_kobject c05f6008 d per_cpu__index_kobject c05f7000 D per_cpu__gdt_page This is because gdt_page is a percpu variable, defined with a page alignement, and linker is doing its job, two times because of .o nesting in the build process. I introduced a new macro DEFINE_PER_CPU_PAGE_ALIGNED() to avoid wasting this space. All page aligned variables (only one at this time) are put in a separate subsection .data.percpu.page_aligned, at the very begining of percpu zone. Before patch , on a x86_32 machine : .data.percpu 30232 3227471872 .data.percpu 22168 3227471872 Thats 8064 bytes saved for each CPU. Signed-off-by: Eric Dumazet Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d0463a9..b2f54fa 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -21,7 +21,7 @@ #include "cpu.h" -DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { +DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index ce5ed08..0f7c29a 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -189,6 +189,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { __per_cpu_start = .; + *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f054778..69e5c11 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -348,6 +348,7 @@ . = ALIGN(align); \ __per_cpu_start = .; \ .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \ + *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ } \ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 4cdd393..2edacc8 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -23,12 +23,19 @@ __attribute__((__section__(SHARED_ALIGNED_SECTION))) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \ ____cacheline_aligned_in_smp + +#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ + __attribute__((__section__(".data.percpu.page_aligned"))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name #else #define DEFINE_PER_CPU(type, name) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name #define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ DEFINE_PER_CPU(type, name) + +#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ + DEFINE_PER_CPU(type, name) #endif #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -- cgit v0.10.2 From 70ef6d595b6e51618a0cbe44b848d8c9db11a010 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 29 May 2008 18:41:04 +0800 Subject: x86: get irq for hpet timer HPET timer's IRQ is 0 by default. So we have to select which irq will be used by these timers. We wait to set the timer's irq until we really open it in order to reduce the chance of conflicting with other device. Signed-off-by: Kevin Hao Signed-off-by: Ingo Molnar diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index e7fb0bc..c9bf5d4 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -184,6 +184,67 @@ static irqreturn_t hpet_interrupt(int irq, void *data) return IRQ_HANDLED; } +static void hpet_timer_set_irq(struct hpet_dev *devp) +{ + unsigned long v; + int irq, gsi; + struct hpet_timer __iomem *timer; + + spin_lock_irq(&hpet_lock); + if (devp->hd_hdwirq) { + spin_unlock_irq(&hpet_lock); + return; + } + + timer = devp->hd_timer; + + /* we prefer level triggered mode */ + v = readl(&timer->hpet_config); + if (!(v & Tn_INT_TYPE_CNF_MASK)) { + v |= Tn_INT_TYPE_CNF_MASK; + writel(v, &timer->hpet_config); + } + spin_unlock_irq(&hpet_lock); + + v = (readq(&timer->hpet_config) & Tn_INT_ROUTE_CAP_MASK) >> + Tn_INT_ROUTE_CAP_SHIFT; + + /* + * In PIC mode, skip IRQ0-4, IRQ6-9, IRQ12-15 which is always used by + * legacy device. In IO APIC mode, we skip all the legacy IRQS. + */ + if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) + v &= ~0xf3df; + else + v &= ~0xffff; + + for (irq = find_first_bit(&v, HPET_MAX_IRQ); irq < HPET_MAX_IRQ; + irq = find_next_bit(&v, HPET_MAX_IRQ, 1 + irq)) { + + if (irq >= NR_IRQS) { + irq = HPET_MAX_IRQ; + break; + } + + gsi = acpi_register_gsi(irq, ACPI_LEVEL_SENSITIVE, + ACPI_ACTIVE_LOW); + if (gsi > 0) + break; + + /* FIXME: Setup interrupt source table */ + } + + if (irq < HPET_MAX_IRQ) { + spin_lock_irq(&hpet_lock); + v = readl(&timer->hpet_config); + v |= irq << Tn_INT_ROUTE_CNF_SHIFT; + writel(v, &timer->hpet_config); + devp->hd_hdwirq = gsi; + spin_unlock_irq(&hpet_lock); + } + return; +} + static int hpet_open(struct inode *inode, struct file *file) { struct hpet_dev *devp; @@ -215,6 +276,8 @@ static int hpet_open(struct inode *inode, struct file *file) devp->hd_flags |= HPET_OPEN; spin_unlock_irq(&hpet_lock); + hpet_timer_set_irq(devp); + return 0; } diff --git a/include/linux/hpet.h b/include/linux/hpet.h index 2dc29ce..6d2626b 100644 --- a/include/linux/hpet.h +++ b/include/linux/hpet.h @@ -37,6 +37,7 @@ struct hpet { #define hpet_compare _u1._hpet_compare #define HPET_MAX_TIMERS (32) +#define HPET_MAX_IRQ (32) /* * HPET general capabilities register @@ -64,7 +65,7 @@ struct hpet { */ #define Tn_INT_ROUTE_CAP_MASK (0xffffffff00000000ULL) -#define Tn_INI_ROUTE_CAP_SHIFT (32UL) +#define Tn_INT_ROUTE_CAP_SHIFT (32UL) #define Tn_FSB_INT_DELCAP_MASK (0x8000UL) #define Tn_FSB_INT_DELCAP_SHIFT (15) #define Tn_FSB_EN_CNF_MASK (0x4000UL) -- cgit v0.10.2 From 3243b4ed817fad04e56e7b7dc78ec28dc8322ff2 Mon Sep 17 00:00:00 2001 From: Krzysztof Oledzki Date: Wed, 4 Jun 2008 03:40:17 +0200 Subject: x86: add another PCI ID for ICH6 force-hpet Tested on Asus P5GDC-V $ lspci -n -n |grep ISA 00:1f.0 ISA bridge [0601]: Intel Corporation 82801FB/FR (ICH6/ICH6R) LPC Interface Bridge [8086:2640] (rev 03) Force enabled HPET at base address 0xfed00000 hpet clockevent registered hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0 hpet0: 3 64-bit timers, 14318180 Hz Signed-off-by: Krzysztof Piotr Oledzki Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index d89a648..06e1fd6 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -158,6 +158,8 @@ static void ich_force_enable_hpet(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_0, + ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0, -- cgit v0.10.2 From 59ea746337c69f6a5f1bc4d5e8544b3cbf12f801 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 12 Jun 2008 13:56:40 +0200 Subject: MM: virtual address debug Add some (configurable) expensive sanity checking to catch wrong address translations on x86. - create linux/mmdebug.h file to be able include this file in asm headers to not get unsolvable loops in header files - __phys_addr on x86_32 became a function in ioremap.c since PAGE_OFFSET, is_vmalloc_addr and VMALLOC_* non-constasts are undefined if declared in page_32.h - add __phys_addr_const for initializing doublefault_tss.__cr3 Tested on 386, 386pae, x86_64 and x86_64 numa=fake=2. Contains Andi's enable numa virtual address debug patch. Signed-off-by: Jiri Slaby Cc: Andi Kleen Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index a47798b..395acb1 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c @@ -66,6 +66,6 @@ struct tss_struct doublefault_tss __cacheline_aligned = { .ds = __USER_DS, .fs = __KERNEL_PERCPU, - .__cr3 = __pa(swapper_pg_dir) + .__cr3 = __phys_addr_const((unsigned long)swapper_pg_dir) } }; diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 2b2bb3f..a78ffef 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -23,18 +23,26 @@ #ifdef CONFIG_X86_64 -unsigned long __phys_addr(unsigned long x) +static inline int phys_addr_valid(unsigned long addr) { - if (x >= __START_KERNEL_map) - return x - __START_KERNEL_map + phys_base; - return x - PAGE_OFFSET; + return addr < (1UL << boot_cpu_data.x86_phys_bits); } -EXPORT_SYMBOL(__phys_addr); -static inline int phys_addr_valid(unsigned long addr) +unsigned long __phys_addr(unsigned long x) { - return addr < (1UL << boot_cpu_data.x86_phys_bits); + if (x >= __START_KERNEL_map) { + x -= __START_KERNEL_map; + VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE); + x += phys_base; + } else { + VIRTUAL_BUG_ON(x < PAGE_OFFSET); + x -= PAGE_OFFSET; + VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM : + !phys_addr_valid(x)); + } + return x; } +EXPORT_SYMBOL(__phys_addr); #else @@ -43,6 +51,15 @@ static inline int phys_addr_valid(unsigned long addr) return 1; } +unsigned long __phys_addr(unsigned long x) +{ + /* VMALLOC_* aren't constants; not available at the boot time */ + VIRTUAL_BUG_ON(x < PAGE_OFFSET || (system_state != SYSTEM_BOOTING && + is_vmalloc_addr((void *)x))); + return x - PAGE_OFFSET; +} +EXPORT_SYMBOL(__phys_addr); + #endif int page_is_ram(unsigned long pagenr) diff --git a/include/asm-x86/mmzone_64.h b/include/asm-x86/mmzone_64.h index 594bd0d..facde3e 100644 --- a/include/asm-x86/mmzone_64.h +++ b/include/asm-x86/mmzone_64.h @@ -7,7 +7,7 @@ #ifdef CONFIG_NUMA -#define VIRTUAL_BUG_ON(x) +#include #include diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h index 424e82f..9159bfb 100644 --- a/include/asm-x86/page_32.h +++ b/include/asm-x86/page_32.h @@ -64,7 +64,8 @@ typedef struct page *pgtable_t; #endif #ifndef __ASSEMBLY__ -#define __phys_addr(x) ((x) - PAGE_OFFSET) +#define __phys_addr_const(x) ((x) - PAGE_OFFSET) +extern unsigned long __phys_addr(unsigned long); #define __phys_reloc_hide(x) RELOC_HIDE((x), 0) #ifdef CONFIG_FLATMEM diff --git a/include/linux/mm.h b/include/linux/mm.h index 586a943..3414a88 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -210,12 +211,6 @@ struct inode; */ #include -#ifdef CONFIG_DEBUG_VM -#define VM_BUG_ON(cond) BUG_ON(cond) -#else -#define VM_BUG_ON(condition) do { } while(0) -#endif - /* * Methods to modify the page usage count. * diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h new file mode 100644 index 0000000..860ed1a --- /dev/null +++ b/include/linux/mmdebug.h @@ -0,0 +1,18 @@ +#ifndef LINUX_MM_DEBUG_H +#define LINUX_MM_DEBUG_H 1 + +#include + +#ifdef CONFIG_DEBUG_VM +#define VM_BUG_ON(cond) BUG_ON(cond) +#else +#define VM_BUG_ON(cond) do { } while(0) +#endif + +#ifdef CONFIG_DEBUG_VIRTUAL +#define VIRTUAL_BUG_ON(cond) BUG_ON(cond) +#else +#define VIRTUAL_BUG_ON(cond) do { } while(0) +#endif + +#endif diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d2099f4..9d9dc0d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -469,6 +469,15 @@ config DEBUG_VM If unsure, say N. +config DEBUG_VIRTUAL + bool "Debug VM translations" + depends on DEBUG_KERNEL && X86 + help + Enable some costly sanity checks in virtual to page code. This can + catch mistakes with virt_to_page() and friends. + + If unsure, say N. + config DEBUG_WRITECOUNT bool "Debug filesystem writers count" depends on DEBUG_KERNEL diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6e45b0f..dc41e9c 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -180,6 +180,11 @@ struct page *vmalloc_to_page(const void *vmalloc_addr) pmd_t *pmd; pte_t *ptep, pte; + /* XXX we might need to change this if we add VIRTUAL_BUG_ON for + * architectures that do not vmalloc module space */ + VIRTUAL_BUG_ON(!is_vmalloc_addr(vmalloc_addr) && + !is_module_address(addr)); + if (!pgd_none(*pgd)) { pud = pud_offset(pgd, addr); if (!pud_none(*pud)) { -- cgit v0.10.2 From a1bf9631be7332ce0641e299ddafad2d8223100f Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 12 Jun 2008 13:56:40 +0200 Subject: x86, MM: virtual address debug, v2 I've removed the test from phys_to_nid and made a function from __phys_addr only when the debugging is enabled (on x86_32). Signed-off-by: Jiri Slaby Cc: tglx@linutronix.de Cc: hpa@zytor.com Cc: Mike Travis Cc: Nick Piggin Cc: Cc: linux-mm@kvack.org Cc: Jiri Slaby Cc: Andi Kleen Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index a78ffef..9dd3cb9 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -51,6 +51,7 @@ static inline int phys_addr_valid(unsigned long addr) return 1; } +#ifdef CONFIG_DEBUG_VIRTUAL unsigned long __phys_addr(unsigned long x) { /* VMALLOC_* aren't constants; not available at the boot time */ @@ -59,6 +60,7 @@ unsigned long __phys_addr(unsigned long x) return x - PAGE_OFFSET; } EXPORT_SYMBOL(__phys_addr); +#endif #endif diff --git a/include/asm-x86/mmzone_64.h b/include/asm-x86/mmzone_64.h index facde3e..5e3a6cb 100644 --- a/include/asm-x86/mmzone_64.h +++ b/include/asm-x86/mmzone_64.h @@ -29,7 +29,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) { unsigned nid; VIRTUAL_BUG_ON(!memnodemap); - VIRTUAL_BUG_ON((addr >> memnode_shift) >= memnodemapsize); nid = memnodemap[addr >> memnode_shift]; VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); return nid; diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h index 9159bfb..71a2e42 100644 --- a/include/asm-x86/page_32.h +++ b/include/asm-x86/page_32.h @@ -65,7 +65,11 @@ typedef struct page *pgtable_t; #ifndef __ASSEMBLY__ #define __phys_addr_const(x) ((x) - PAGE_OFFSET) +#ifdef CONFIG_DEBUG_VIRTUAL extern unsigned long __phys_addr(unsigned long); +#else +#define __phys_addr(x) ((x) - PAGE_OFFSET) +#endif #define __phys_reloc_hide(x) RELOC_HIDE((x), 0) #ifdef CONFIG_FLATMEM -- cgit v0.10.2 From 7aa413def76146f7b3784228556d9e4bc562eab3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 19 Jun 2008 13:28:11 +0200 Subject: x86, MM: virtual address debug, cleanups Signed-off-by: Ingo Molnar diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 860ed1a..8a55098 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -6,13 +6,13 @@ #ifdef CONFIG_DEBUG_VM #define VM_BUG_ON(cond) BUG_ON(cond) #else -#define VM_BUG_ON(cond) do { } while(0) +#define VM_BUG_ON(cond) do { } while (0) #endif #ifdef CONFIG_DEBUG_VIRTUAL #define VIRTUAL_BUG_ON(cond) BUG_ON(cond) #else -#define VIRTUAL_BUG_ON(cond) do { } while(0) +#define VIRTUAL_BUG_ON(cond) do { } while (0) #endif #endif diff --git a/mm/vmalloc.c b/mm/vmalloc.c index dc41e9c..830a558 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -180,8 +180,10 @@ struct page *vmalloc_to_page(const void *vmalloc_addr) pmd_t *pmd; pte_t *ptep, pte; - /* XXX we might need to change this if we add VIRTUAL_BUG_ON for - * architectures that do not vmalloc module space */ + /* + * XXX we might need to change this if we add VIRTUAL_BUG_ON for + * architectures that do not vmalloc module space + */ VIRTUAL_BUG_ON(!is_vmalloc_addr(vmalloc_addr) && !is_module_address(addr)); -- cgit v0.10.2 From f3561810b163aca5388ad550abbbc82ae5323189 Mon Sep 17 00:00:00 2001 From: Joe Buehler Date: Mon, 9 Jun 2008 08:55:20 -0400 Subject: x86: add PCI ID for 6300ESB force hpet 00:1f.0 ISA bridge: Intel Corporation 6300ESB LPC Interface Controller (rev 02) 00:1f.0 Class 0601: 8086:25a1 (rev 02) kernel: pci 0000:00:1f.0: Force enabled HPET at 0xfed00000 kernel: hpet clockevent registered kernel: hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0 kernel: hpet0: 3 64-bit timers, 14318180 Hz Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 06e1fd6..f327aba 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -257,6 +257,8 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev) old_ich_force_enable_hpet(dev); } +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1, + old_ich_force_enable_hpet_user); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, old_ich_force_enable_hpet_user); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, -- cgit v0.10.2 From e61d98d8dad0048619bb138b0ff996422ffae53b Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:35 -0700 Subject: x64, x2apic/intr-remap: Intel vt-d, IOMMU code reorganization code reorganization of the generic Intel vt-d parsing related routines and linux iommu routines specific to Intel vt-d. drivers/pci/dmar.c now contains the generic vt-d parsing related routines drivers/pci/intel_iommu.c contains the iommu routines specific to vt-d Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/drivers/pci/dma_remapping.h b/drivers/pci/dma_remapping.h new file mode 100644 index 0000000..05aac8e --- /dev/null +++ b/drivers/pci/dma_remapping.h @@ -0,0 +1,155 @@ +#ifndef _DMA_REMAPPING_H +#define _DMA_REMAPPING_H + +/* + * We need a fixed PAGE_SIZE of 4K irrespective of + * arch PAGE_SIZE for IOMMU page tables. + */ +#define PAGE_SHIFT_4K (12) +#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) +#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) +#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) + +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K) +#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) +#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) + + +/* + * 0: Present + * 1-11: Reserved + * 12-63: Context Ptr (12 - (haw-1)) + * 64-127: Reserved + */ +struct root_entry { + u64 val; + u64 rsvd1; +}; +#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) +static inline bool root_present(struct root_entry *root) +{ + return (root->val & 1); +} +static inline void set_root_present(struct root_entry *root) +{ + root->val |= 1; +} +static inline void set_root_value(struct root_entry *root, unsigned long value) +{ + root->val |= value & PAGE_MASK_4K; +} + +struct context_entry; +static inline struct context_entry * +get_context_addr_from_root(struct root_entry *root) +{ + return (struct context_entry *) + (root_present(root)?phys_to_virt( + root->val & PAGE_MASK_4K): + NULL); +} + +/* + * low 64 bits: + * 0: present + * 1: fault processing disable + * 2-3: translation type + * 12-63: address space root + * high 64 bits: + * 0-2: address width + * 3-6: aval + * 8-23: domain id + */ +struct context_entry { + u64 lo; + u64 hi; +}; +#define context_present(c) ((c).lo & 1) +#define context_fault_disable(c) (((c).lo >> 1) & 1) +#define context_translation_type(c) (((c).lo >> 2) & 3) +#define context_address_root(c) ((c).lo & PAGE_MASK_4K) +#define context_address_width(c) ((c).hi & 7) +#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) + +#define context_set_present(c) do {(c).lo |= 1;} while (0) +#define context_set_fault_enable(c) \ + do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) +#define context_set_translation_type(c, val) \ + do { \ + (c).lo &= (((u64)-1) << 4) | 3; \ + (c).lo |= ((val) & 3) << 2; \ + } while (0) +#define CONTEXT_TT_MULTI_LEVEL 0 +#define context_set_address_root(c, val) \ + do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) +#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) +#define context_set_domain_id(c, val) \ + do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) +#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) + +/* + * 0: readable + * 1: writable + * 2-6: reserved + * 7: super page + * 8-11: available + * 12-63: Host physcial address + */ +struct dma_pte { + u64 val; +}; +#define dma_clear_pte(p) do {(p).val = 0;} while (0) + +#define DMA_PTE_READ (1) +#define DMA_PTE_WRITE (2) + +#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) +#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) +#define dma_set_pte_prot(p, prot) \ + do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) +#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) +#define dma_set_pte_addr(p, addr) do {\ + (p).val |= ((addr) & PAGE_MASK_4K); } while (0) +#define dma_pte_present(p) (((p).val & 3) != 0) + +struct intel_iommu; + +struct dmar_domain { + int id; /* domain id */ + struct intel_iommu *iommu; /* back pointer to owning iommu */ + + struct list_head devices; /* all devices' list */ + struct iova_domain iovad; /* iova's that belong to this domain */ + + struct dma_pte *pgd; /* virtual address */ + spinlock_t mapping_lock; /* page table lock */ + int gaw; /* max guest address width */ + + /* adjusted guest address width, 0 is level 2 30-bit */ + int agaw; + +#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 + int flags; +}; + +/* PCI domain-device relationship */ +struct device_domain_info { + struct list_head link; /* link to domain siblings */ + struct list_head global; /* link to global list */ + u8 bus; /* PCI bus numer */ + u8 devfn; /* PCI devfn number */ + struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ + struct dmar_domain *domain; /* pointer to domain */ +}; + +extern int init_dmars(void); +extern void free_dmar_iommu(struct intel_iommu *iommu); + +#ifndef CONFIG_DMAR_GFX_WA +static inline void iommu_prepare_gfx_mapping(void) +{ + return; +} +#endif /* !CONFIG_DMAR_GFX_WA */ + +#endif diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index f941f60..c00e387 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -19,9 +19,11 @@ * Author: Shaohua Li * Author: Anil S Keshavamurthy * - * This file implements early detection/parsing of DMA Remapping Devices + * This file implements early detection/parsing of Remapping Devices * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI * tables. + * + * These routines are used by both DMA-remapping and Interrupt-remapping */ #include @@ -300,6 +302,37 @@ parse_dmar_table(void) return ret; } +int dmar_pci_device_match(struct pci_dev *devices[], int cnt, + struct pci_dev *dev) +{ + int index; + + while (dev) { + for (index = 0; index < cnt; index++) + if (dev == devices[index]) + return 1; + + /* Check our parent */ + dev = dev->bus->self; + } + + return 0; +} + +struct dmar_drhd_unit * +dmar_find_matched_drhd_unit(struct pci_dev *dev) +{ + struct dmar_drhd_unit *drhd = NULL; + + list_for_each_entry(drhd, &dmar_drhd_units, list) { + if (drhd->include_all || dmar_pci_device_match(drhd->devices, + drhd->devices_cnt, dev)) + return drhd; + } + + return NULL; +} + int __init dmar_table_init(void) { @@ -343,3 +376,58 @@ int __init early_dmar_detect(void) return (ACPI_SUCCESS(status) ? 1 : 0); } + +struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, + struct dmar_drhd_unit *drhd) +{ + int map_size; + u32 ver; + + iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); + if (!iommu->reg) { + printk(KERN_ERR "IOMMU: can't map the region\n"); + goto error; + } + iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); + iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); + + /* the registers might be more than one page */ + map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), + cap_max_fault_reg_offset(iommu->cap)); + map_size = PAGE_ALIGN_4K(map_size); + if (map_size > PAGE_SIZE_4K) { + iounmap(iommu->reg); + iommu->reg = ioremap(drhd->reg_base_addr, map_size); + if (!iommu->reg) { + printk(KERN_ERR "IOMMU: can't map the region\n"); + goto error; + } + } + + ver = readl(iommu->reg + DMAR_VER_REG); + pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", + drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), + iommu->cap, iommu->ecap); + + spin_lock_init(&iommu->register_lock); + + drhd->iommu = iommu; + return iommu; +error: + kfree(iommu); + return NULL; +} + +void free_iommu(struct intel_iommu *iommu) +{ + if (!iommu) + return; + +#ifdef CONFIG_DMAR + free_dmar_iommu(iommu); +#endif + + if (iommu->reg) + iounmap(iommu->reg); + kfree(iommu); +} diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index bb06423..1c0270d 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -990,6 +990,8 @@ static int iommu_init_domains(struct intel_iommu *iommu) return -ENOMEM; } + spin_lock_init(&iommu->lock); + /* * if Caching mode is set, then invalid translations are tagged * with domainid 0. Hence we need to pre-allocate it. @@ -998,62 +1000,15 @@ static int iommu_init_domains(struct intel_iommu *iommu) set_bit(0, iommu->domain_ids); return 0; } -static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, - struct dmar_drhd_unit *drhd) -{ - int ret; - int map_size; - u32 ver; - - iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); - if (!iommu->reg) { - printk(KERN_ERR "IOMMU: can't map the region\n"); - goto error; - } - iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); - iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); - - /* the registers might be more than one page */ - map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), - cap_max_fault_reg_offset(iommu->cap)); - map_size = PAGE_ALIGN_4K(map_size); - if (map_size > PAGE_SIZE_4K) { - iounmap(iommu->reg); - iommu->reg = ioremap(drhd->reg_base_addr, map_size); - if (!iommu->reg) { - printk(KERN_ERR "IOMMU: can't map the region\n"); - goto error; - } - } - ver = readl(iommu->reg + DMAR_VER_REG); - pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", - drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), - iommu->cap, iommu->ecap); - ret = iommu_init_domains(iommu); - if (ret) - goto error_unmap; - spin_lock_init(&iommu->lock); - spin_lock_init(&iommu->register_lock); - - drhd->iommu = iommu; - return iommu; -error_unmap: - iounmap(iommu->reg); -error: - kfree(iommu); - return NULL; -} static void domain_exit(struct dmar_domain *domain); -static void free_iommu(struct intel_iommu *iommu) + +void free_dmar_iommu(struct intel_iommu *iommu) { struct dmar_domain *domain; int i; - if (!iommu) - return; - i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); for (; i < cap_ndoms(iommu->cap); ) { domain = iommu->domains[i]; @@ -1078,10 +1033,6 @@ static void free_iommu(struct intel_iommu *iommu) /* free context mapping */ free_context_table(iommu); - - if (iommu->reg) - iounmap(iommu->reg); - kfree(iommu); } static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) @@ -1426,37 +1377,6 @@ find_domain(struct pci_dev *pdev) return NULL; } -static int dmar_pci_device_match(struct pci_dev *devices[], int cnt, - struct pci_dev *dev) -{ - int index; - - while (dev) { - for (index = 0; index < cnt; index++) - if (dev == devices[index]) - return 1; - - /* Check our parent */ - dev = dev->bus->self; - } - - return 0; -} - -static struct dmar_drhd_unit * -dmar_find_matched_drhd_unit(struct pci_dev *dev) -{ - struct dmar_drhd_unit *drhd = NULL; - - list_for_each_entry(drhd, &dmar_drhd_units, list) { - if (drhd->include_all || dmar_pci_device_match(drhd->devices, - drhd->devices_cnt, dev)) - return drhd; - } - - return NULL; -} - /* domain is initialized */ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) { @@ -1764,6 +1684,10 @@ int __init init_dmars(void) goto error; } + ret = iommu_init_domains(iommu); + if (ret) + goto error; + /* * TBD: * we could share the same root & context tables diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index afc0ad9..9e5e98c 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -27,19 +27,7 @@ #include #include "iova.h" #include - -/* - * We need a fixed PAGE_SIZE of 4K irrespective of - * arch PAGE_SIZE for IOMMU page tables. - */ -#define PAGE_SHIFT_4K (12) -#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) -#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) -#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) - -#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K) -#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) -#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) +#include "dma_remapping.h" /* * Intel IOMMU register specification per version 1.0 public spec. @@ -187,158 +175,31 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define dma_frcd_source_id(c) (c & 0xffff) #define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ -/* - * 0: Present - * 1-11: Reserved - * 12-63: Context Ptr (12 - (haw-1)) - * 64-127: Reserved - */ -struct root_entry { - u64 val; - u64 rsvd1; -}; -#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) -static inline bool root_present(struct root_entry *root) -{ - return (root->val & 1); -} -static inline void set_root_present(struct root_entry *root) -{ - root->val |= 1; -} -static inline void set_root_value(struct root_entry *root, unsigned long value) -{ - root->val |= value & PAGE_MASK_4K; -} - -struct context_entry; -static inline struct context_entry * -get_context_addr_from_root(struct root_entry *root) -{ - return (struct context_entry *) - (root_present(root)?phys_to_virt( - root->val & PAGE_MASK_4K): - NULL); -} - -/* - * low 64 bits: - * 0: present - * 1: fault processing disable - * 2-3: translation type - * 12-63: address space root - * high 64 bits: - * 0-2: address width - * 3-6: aval - * 8-23: domain id - */ -struct context_entry { - u64 lo; - u64 hi; -}; -#define context_present(c) ((c).lo & 1) -#define context_fault_disable(c) (((c).lo >> 1) & 1) -#define context_translation_type(c) (((c).lo >> 2) & 3) -#define context_address_root(c) ((c).lo & PAGE_MASK_4K) -#define context_address_width(c) ((c).hi & 7) -#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) - -#define context_set_present(c) do {(c).lo |= 1;} while (0) -#define context_set_fault_enable(c) \ - do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) -#define context_set_translation_type(c, val) \ - do { \ - (c).lo &= (((u64)-1) << 4) | 3; \ - (c).lo |= ((val) & 3) << 2; \ - } while (0) -#define CONTEXT_TT_MULTI_LEVEL 0 -#define context_set_address_root(c, val) \ - do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) -#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) -#define context_set_domain_id(c, val) \ - do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) -#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) - -/* - * 0: readable - * 1: writable - * 2-6: reserved - * 7: super page - * 8-11: available - * 12-63: Host physcial address - */ -struct dma_pte { - u64 val; -}; -#define dma_clear_pte(p) do {(p).val = 0;} while (0) - -#define DMA_PTE_READ (1) -#define DMA_PTE_WRITE (2) - -#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) -#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) -#define dma_set_pte_prot(p, prot) \ - do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) -#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) -#define dma_set_pte_addr(p, addr) do {\ - (p).val |= ((addr) & PAGE_MASK_4K); } while (0) -#define dma_pte_present(p) (((p).val & 3) != 0) - -struct intel_iommu; - -struct dmar_domain { - int id; /* domain id */ - struct intel_iommu *iommu; /* back pointer to owning iommu */ - - struct list_head devices; /* all devices' list */ - struct iova_domain iovad; /* iova's that belong to this domain */ - - struct dma_pte *pgd; /* virtual address */ - spinlock_t mapping_lock; /* page table lock */ - int gaw; /* max guest address width */ - - /* adjusted guest address width, 0 is level 2 30-bit */ - int agaw; - -#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 - int flags; -}; - -/* PCI domain-device relationship */ -struct device_domain_info { - struct list_head link; /* link to domain siblings */ - struct list_head global; /* link to global list */ - u8 bus; /* PCI bus numer */ - u8 devfn; /* PCI devfn number */ - struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ - struct dmar_domain *domain; /* pointer to domain */ -}; - -extern int init_dmars(void); - struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; u64 ecap; - unsigned long *domain_ids; /* bitmap of domains */ - struct dmar_domain **domains; /* ptr to domains */ int seg; u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ - spinlock_t lock; /* protect context, domain ids */ spinlock_t register_lock; /* protect register handling */ + +#ifdef CONFIG_DMAR + unsigned long *domain_ids; /* bitmap of domains */ + struct dmar_domain **domains; /* ptr to domains */ + spinlock_t lock; /* protect context, domain ids */ struct root_entry *root_entry; /* virtual address */ unsigned int irq; unsigned char name[7]; /* Device Name */ struct msi_msg saved_msg; struct sys_device sysdev; +#endif }; -#ifndef CONFIG_DMAR_GFX_WA -static inline void iommu_prepare_gfx_mapping(void) -{ - return; -} -#endif /* !CONFIG_DMAR_GFX_WA */ +extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); + +extern struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, + struct dmar_drhd_unit *drhd); +extern void free_iommu(struct intel_iommu *iommu); #endif -- cgit v0.10.2 From c42d9f32443397aed2d37d37df161392e6a5862f Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:36 -0700 Subject: x64, x2apic/intr-remap: fix the need for sequential array allocation of iommus Clean up the intel-iommu code related to deferred iommu flush logic. There is no need to allocate all the iommu's as a sequential array. This will be used later in the interrupt-remapping patch series to allocate iommu much early and individually for each device remapping hardware unit. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index c00e387..1a59423 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -377,11 +377,18 @@ int __init early_dmar_detect(void) return (ACPI_SUCCESS(status) ? 1 : 0); } -struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, - struct dmar_drhd_unit *drhd) +struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) { + struct intel_iommu *iommu; int map_size; u32 ver; + static int iommu_allocated = 0; + + iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); + if (!iommu) + return NULL; + + iommu->seq_id = iommu_allocated++; iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K); if (!iommu->reg) { diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 1c0270d..4d59a6a 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -58,8 +58,6 @@ static void flush_unmaps_timeout(unsigned long data); DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); -static struct intel_iommu *g_iommus; - #define HIGH_WATER_MARK 250 struct deferred_flush_tables { int next; @@ -1649,8 +1647,6 @@ int __init init_dmars(void) * endfor */ for_each_drhd_unit(drhd) { - if (drhd->ignored) - continue; g_num_of_iommus++; /* * lock not needed as this is only incremented in the single @@ -1659,26 +1655,17 @@ int __init init_dmars(void) */ } - g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL); - if (!g_iommus) { - ret = -ENOMEM; - goto error; - } - deferred_flush = kzalloc(g_num_of_iommus * sizeof(struct deferred_flush_tables), GFP_KERNEL); if (!deferred_flush) { - kfree(g_iommus); ret = -ENOMEM; goto error; } - i = 0; for_each_drhd_unit(drhd) { if (drhd->ignored) continue; - iommu = alloc_iommu(&g_iommus[i], drhd); - i++; + iommu = alloc_iommu(drhd); if (!iommu) { ret = -ENOMEM; goto error; @@ -1770,7 +1757,6 @@ error: iommu = drhd->iommu; free_iommu(iommu); } - kfree(g_iommus); return ret; } @@ -1927,7 +1913,10 @@ static void flush_unmaps(void) /* just flush them all */ for (i = 0; i < g_num_of_iommus; i++) { if (deferred_flush[i].next) { - iommu_flush_iotlb_global(&g_iommus[i], 0); + struct intel_iommu *iommu = + deferred_flush[i].domain[0]->iommu; + + iommu_flush_iotlb_global(iommu, 0); for (j = 0; j < deferred_flush[i].next; j++) { __free_iova(&deferred_flush[i].domain[j]->iovad, deferred_flush[i].iova[j]); @@ -1957,7 +1946,8 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova) if (list_size == HIGH_WATER_MARK) flush_unmaps(); - iommu_id = dom->iommu - g_iommus; + iommu_id = dom->iommu->seq_id; + next = deferred_flush[iommu_id].next; deferred_flush[iommu_id].domain[next] = dom; deferred_flush[iommu_id].iova[next] = iova; diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index 9e5e98c..75c63f65 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -182,6 +182,7 @@ struct intel_iommu { int seg; u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ spinlock_t register_lock; /* protect register handling */ + int seq_id; /* sequence id of the iommu */ #ifdef CONFIG_DMAR unsigned long *domain_ids; /* bitmap of domains */ @@ -198,8 +199,7 @@ struct intel_iommu { extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); -extern struct intel_iommu *alloc_iommu(struct intel_iommu *iommu, - struct dmar_drhd_unit *drhd); +extern struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd); extern void free_iommu(struct intel_iommu *iommu); #endif -- cgit v0.10.2 From 1886e8a90a580f3ad343f2065c84c1b9e1dac9ef Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:37 -0700 Subject: x64, x2apic/intr-remap: code re-structuring, to be used by both DMA and Interrupt remapping Allocate the iommu during the parse of DMA remapping hardware definition structures. And also, introduce routines for device scope initialization which will be explicitly called during dma-remapping initialization. These will be used for enabling interrupt remapping separately from the existing DMA-remapping enabling sequence. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 1a59423..158bc5b 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -174,19 +174,37 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header) struct acpi_dmar_hardware_unit *drhd; struct dmar_drhd_unit *dmaru; int ret = 0; - static int include_all; dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL); if (!dmaru) return -ENOMEM; + dmaru->hdr = header; drhd = (struct acpi_dmar_hardware_unit *)header; dmaru->reg_base_addr = drhd->address; dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ + ret = alloc_iommu(dmaru); + if (ret) { + kfree(dmaru); + return ret; + } + dmar_register_drhd_unit(dmaru); + return 0; +} + +static int __init +dmar_parse_dev(struct dmar_drhd_unit *dmaru) +{ + struct acpi_dmar_hardware_unit *drhd; + static int include_all; + int ret; + + drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr; + if (!dmaru->include_all) ret = dmar_parse_dev_scope((void *)(drhd + 1), - ((void *)drhd) + header->length, + ((void *)drhd) + drhd->header.length, &dmaru->devices_cnt, &dmaru->devices, drhd->segment); else { @@ -199,10 +217,10 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header) include_all = 1; } - if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) + if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) { + list_del(&dmaru->list); kfree(dmaru); - else - dmar_register_drhd_unit(dmaru); + } return ret; } @@ -211,23 +229,35 @@ dmar_parse_one_rmrr(struct acpi_dmar_header *header) { struct acpi_dmar_reserved_memory *rmrr; struct dmar_rmrr_unit *rmrru; - int ret = 0; rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); if (!rmrru) return -ENOMEM; + rmrru->hdr = header; rmrr = (struct acpi_dmar_reserved_memory *)header; rmrru->base_address = rmrr->base_address; rmrru->end_address = rmrr->end_address; + + dmar_register_rmrr_unit(rmrru); + return 0; +} + +static int __init +rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) +{ + struct acpi_dmar_reserved_memory *rmrr; + int ret; + + rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr; ret = dmar_parse_dev_scope((void *)(rmrr + 1), - ((void *)rmrr) + header->length, + ((void *)rmrr) + rmrr->header.length, &rmrru->devices_cnt, &rmrru->devices, rmrr->segment); - if (ret || (rmrru->devices_cnt == 0)) + if (ret || (rmrru->devices_cnt == 0)) { + list_del(&rmrru->list); kfree(rmrru); - else - dmar_register_rmrr_unit(rmrru); + } return ret; } @@ -333,15 +363,42 @@ dmar_find_matched_drhd_unit(struct pci_dev *dev) return NULL; } +int __init dmar_dev_scope_init(void) +{ + struct dmar_drhd_unit *drhd; + struct dmar_rmrr_unit *rmrr; + int ret = -ENODEV; + + for_each_drhd_unit(drhd) { + ret = dmar_parse_dev(drhd); + if (ret) + return ret; + } + + for_each_rmrr_units(rmrr) { + ret = rmrr_parse_dev(rmrr); + if (ret) + return ret; + } + + return ret; +} + int __init dmar_table_init(void) { - + static int dmar_table_initialized; int ret; + if (dmar_table_initialized) + return 0; + + dmar_table_initialized = 1; + ret = parse_dmar_table(); if (ret) { - printk(KERN_INFO PREFIX "parse DMAR table failure.\n"); + if (ret != -ENODEV) + printk(KERN_INFO PREFIX "parse DMAR table failure.\n"); return ret; } @@ -377,7 +434,7 @@ int __init early_dmar_detect(void) return (ACPI_SUCCESS(status) ? 1 : 0); } -struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) +int alloc_iommu(struct dmar_drhd_unit *drhd) { struct intel_iommu *iommu; int map_size; @@ -386,7 +443,7 @@ struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); if (!iommu) - return NULL; + return -ENOMEM; iommu->seq_id = iommu_allocated++; @@ -419,10 +476,10 @@ struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd) spin_lock_init(&iommu->register_lock); drhd->iommu = iommu; - return iommu; + return 0; error: kfree(iommu); - return NULL; + return -1; } void free_iommu(struct intel_iommu *iommu) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 4d59a6a..218a1f3 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1665,11 +1665,8 @@ int __init init_dmars(void) for_each_drhd_unit(drhd) { if (drhd->ignored) continue; - iommu = alloc_iommu(drhd); - if (!iommu) { - ret = -ENOMEM; - goto error; - } + + iommu = drhd->iommu; ret = iommu_init_domains(iommu); if (ret) @@ -2324,6 +2321,9 @@ int __init intel_iommu_init(void) if (dmar_table_init()) return -ENODEV; + if (dmar_dev_scope_init()) + return -ENODEV; + iommu_init_mempool(); dmar_init_reserved_ranges(); diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index 75c63f65..371e3b9 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -199,7 +199,7 @@ struct intel_iommu { extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); -extern struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd); +extern int alloc_iommu(struct dmar_drhd_unit *drhd); extern void free_iommu(struct intel_iommu *iommu); #endif diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 56c73b8..3ab07e4 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -46,12 +46,14 @@ extern int intel_iommu_init(void); extern int dmar_table_init(void); extern int early_dmar_detect(void); +extern int dmar_dev_scope_init(void); extern struct list_head dmar_drhd_units; extern struct list_head dmar_rmrr_units; struct dmar_drhd_unit { struct list_head list; /* list of drhd units */ + struct acpi_dmar_header *hdr; /* ACPI header */ u64 reg_base_addr; /* register base address*/ struct pci_dev **devices; /* target device array */ int devices_cnt; /* target device count */ @@ -62,6 +64,7 @@ struct dmar_drhd_unit { struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ + struct acpi_dmar_header *hdr; /* ACPI header */ u64 base_address; /* reserved base address*/ u64 end_address; /* reserved end address */ struct pci_dev **devices; /* target devices */ @@ -72,6 +75,8 @@ struct dmar_rmrr_unit { list_for_each_entry(drhd, &dmar_drhd_units, list) #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) + +extern int alloc_iommu(struct dmar_drhd_unit *); #else static inline void detect_intel_iommu(void) { @@ -81,6 +86,9 @@ static inline int intel_iommu_init(void) { return -ENODEV; } - +static inline int dmar_table_init(void) +{ + return -ENODEV; +} #endif /* !CONFIG_DMAR */ #endif /* __DMAR_H__ */ -- cgit v0.10.2 From aaa9d1dd63bf89b62f4ea9f46de376ab1a3fbc6c Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:38 -0700 Subject: x64, x2apic/intr-remap: use CONFIG_DMAR for DMA-remapping specific code DMA remapping specific code covered with CONFIG_DMAR in the generic code which will also be used later for enabling Interrupt-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 158bc5b..5c99f99 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -39,7 +39,6 @@ * these units are not supported by the architecture. */ LIST_HEAD(dmar_drhd_units); -LIST_HEAD(dmar_rmrr_units); static struct acpi_table_header * __initdata dmar_tbl; @@ -55,11 +54,6 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) list_add(&drhd->list, &dmar_drhd_units); } -static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr) -{ - list_add(&rmrr->list, &dmar_rmrr_units); -} - static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, struct pci_dev **dev, u16 segment) { @@ -224,6 +218,15 @@ dmar_parse_dev(struct dmar_drhd_unit *dmaru) return ret; } +#ifdef CONFIG_DMAR +LIST_HEAD(dmar_rmrr_units); + +static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr) +{ + list_add(&rmrr->list, &dmar_rmrr_units); +} + + static int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header) { @@ -260,6 +263,7 @@ rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) } return ret; } +#endif static void __init dmar_table_print_dmar_entry(struct acpi_dmar_header *header) @@ -284,6 +288,7 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) } } + /** * parse_dmar_table - parses the DMA reporting table */ @@ -316,7 +321,9 @@ parse_dmar_table(void) ret = dmar_parse_one_drhd(entry_header); break; case ACPI_DMAR_TYPE_RESERVED_MEMORY: +#ifdef CONFIG_DMAR ret = dmar_parse_one_rmrr(entry_header); +#endif break; default: printk(KERN_WARNING PREFIX @@ -366,7 +373,6 @@ dmar_find_matched_drhd_unit(struct pci_dev *dev) int __init dmar_dev_scope_init(void) { struct dmar_drhd_unit *drhd; - struct dmar_rmrr_unit *rmrr; int ret = -ENODEV; for_each_drhd_unit(drhd) { @@ -375,11 +381,16 @@ int __init dmar_dev_scope_init(void) return ret; } - for_each_rmrr_units(rmrr) { - ret = rmrr_parse_dev(rmrr); - if (ret) - return ret; +#ifdef CONFIG_DMAR + { + struct dmar_rmrr_unit *rmrr; + for_each_rmrr_units(rmrr) { + ret = rmrr_parse_dev(rmrr); + if (ret) + return ret; + } } +#endif return ret; } @@ -407,10 +418,12 @@ int __init dmar_table_init(void) return -ENODEV; } +#ifdef CONFIG_DMAR if (list_empty(&dmar_rmrr_units)) { printk(KERN_INFO PREFIX "No RMRR found\n"); return -ENODEV; } +#endif return 0; } -- cgit v0.10.2 From 2d6b5f85bb4ca919d8ab0f30311309b53fb93bc3 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:39 -0700 Subject: x64, x2apic/intr-remap: Fix the need for RMRR in the DMA-remapping detection Presence of RMRR structures is not compulsory for enabling DMA-remapping. Signed-off-by: Suresh Siddha Signed-off-by: Yong Y Wang Cc: Yong Y Wang Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 5c99f99..903f170 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -419,10 +419,8 @@ int __init dmar_table_init(void) } #ifdef CONFIG_DMAR - if (list_empty(&dmar_rmrr_units)) { + if (list_empty(&dmar_rmrr_units)) printk(KERN_INFO PREFIX "No RMRR found\n"); - return -ENODEV; - } #endif return 0; -- cgit v0.10.2 From ad3ad3f6a2caebf56869b83b69e23eb9fa5e0ab6 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:40 -0700 Subject: x64, x2apic/intr-remap: parse ioapic scope under vt-d structures Parse the vt-d device scope structures to find the mapping between IO-APICs and the interrupt remapping hardware units. This will be used later for enabling Interrupt-remapping for IOAPIC devices. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 4d1ce2e..1c409c7 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -26,6 +26,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o # Build Intel IOMMU support obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o +obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o + # # Some architectures use the generic PCI setup functions # diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 903f170..127764c 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -423,6 +423,9 @@ int __init dmar_table_init(void) printk(KERN_INFO PREFIX "No RMRR found\n"); #endif +#ifdef CONFIG_INTR_REMAP + parse_ioapics_under_ir(); +#endif return 0; } diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index 371e3b9..eb167e3 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -114,6 +114,8 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define ecap_max_iotlb_offset(e) \ (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) #define ecap_coherent(e) ((e) & 0x1) +#define ecap_eim_support(e) ((e >> 4) & 0x1) +#define ecap_ir_support(e) ((e >> 3) & 0x1) /* IOTLB_REG */ diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c new file mode 100644 index 0000000..a80b879 --- /dev/null +++ b/drivers/pci/intr_remapping.c @@ -0,0 +1,70 @@ +#include +#include +#include "intel-iommu.h" +#include "intr_remapping.h" + +static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; +static int ir_ioapic_num; + +static int ir_parse_ioapic_scope(struct acpi_dmar_header *header, + struct intel_iommu *iommu) +{ + struct acpi_dmar_hardware_unit *drhd; + struct acpi_dmar_device_scope *scope; + void *start, *end; + + drhd = (struct acpi_dmar_hardware_unit *)header; + + start = (void *)(drhd + 1); + end = ((void *)drhd) + header->length; + + while (start < end) { + scope = start; + if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) { + if (ir_ioapic_num == MAX_IO_APICS) { + printk(KERN_WARNING "Exceeded Max IO APICS\n"); + return -1; + } + + printk(KERN_INFO "IOAPIC id %d under DRHD base" + " 0x%Lx\n", scope->enumeration_id, + drhd->address); + + ir_ioapic[ir_ioapic_num].iommu = iommu; + ir_ioapic[ir_ioapic_num].id = scope->enumeration_id; + ir_ioapic_num++; + } + start += scope->length; + } + + return 0; +} + +/* + * Finds the assocaition between IOAPIC's and its Interrupt-remapping + * hardware unit. + */ +int __init parse_ioapics_under_ir(void) +{ + struct dmar_drhd_unit *drhd; + int ir_supported = 0; + + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu = drhd->iommu; + + if (ecap_ir_support(iommu->ecap)) { + if (ir_parse_ioapic_scope(drhd->hdr, iommu)) + return -1; + + ir_supported = 1; + } + } + + if (ir_supported && ir_ioapic_num != nr_ioapics) { + printk(KERN_WARNING + "Not all IO-APIC's listed under remapping hardware\n"); + return -1; + } + + return ir_supported; +} diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h new file mode 100644 index 0000000..c4a40b2 --- /dev/null +++ b/drivers/pci/intr_remapping.h @@ -0,0 +1,6 @@ +#include "intel-iommu.h" + +struct ioapic_scope { + struct intel_iommu *iommu; + unsigned int id; +}; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 3ab07e4..c4e96eb 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -47,6 +47,7 @@ extern int intel_iommu_init(void); extern int dmar_table_init(void); extern int early_dmar_detect(void); extern int dmar_dev_scope_init(void); +extern int parse_ioapics_under_ir(void); extern struct list_head dmar_drhd_units; extern struct list_head dmar_rmrr_units; -- cgit v0.10.2 From cf1337f0447e5be8e66daa944f0ea3bcac2b6179 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:41 -0700 Subject: x64, x2apic/intr-remap: move IOMMU_WAIT_OP() macro to intel-iommu.h move IOMMU_WAIT_OP() macro to header file. This will be used by both DMA-remapping and Intr-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 218a1f3..fb701d9 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -49,8 +49,6 @@ #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 -#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */ - #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) @@ -486,19 +484,6 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) return 0; } -#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ -{\ - cycles_t start_time = get_cycles();\ - while (1) {\ - sts = op (iommu->reg + offset);\ - if (cond)\ - break;\ - if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\ - panic("DMAR hardware is malfunctioning\n");\ - cpu_relax();\ - }\ -} - static void iommu_set_root_entry(struct intel_iommu *iommu) { void *addr; diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index eb167e3..3a650e8 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -177,6 +177,21 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define dma_frcd_source_id(c) (c & 0xffff) #define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ +#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */ + +#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ +{\ + cycles_t start_time = get_cycles();\ + while (1) {\ + sts = op (iommu->reg + offset);\ + if (cond)\ + break;\ + if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\ + panic("DMAR hardware is malfunctioning\n");\ + cpu_relax();\ + }\ +} + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; -- cgit v0.10.2 From fe962e90cb17a8426e144dee970e77ed789d98ee Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:42 -0700 Subject: x64, x2apic/intr-remap: Queued invalidation infrastructure (part of VT-d) Queued invalidation (part of Intel Virtualization Technology for Directed I/O architecture) infrastructure. This will be used for invalidating the interrupt entry cache in the case of Interrupt-remapping and IOTLB invalidation in the case of DMA-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 127764c..aba151c 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -28,6 +28,7 @@ #include #include +#include #include "iova.h" #include "intel-iommu.h" @@ -509,3 +510,152 @@ void free_iommu(struct intel_iommu *iommu) iounmap(iommu->reg); kfree(iommu); } + +/* + * Reclaim all the submitted descriptors which have completed its work. + */ +static inline void reclaim_free_desc(struct q_inval *qi) +{ + while (qi->desc_status[qi->free_tail] == QI_DONE) { + qi->desc_status[qi->free_tail] = QI_FREE; + qi->free_tail = (qi->free_tail + 1) % QI_LENGTH; + qi->free_cnt++; + } +} + +/* + * Submit the queued invalidation descriptor to the remapping + * hardware unit and wait for its completion. + */ +void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) +{ + struct q_inval *qi = iommu->qi; + struct qi_desc *hw, wait_desc; + int wait_index, index; + unsigned long flags; + + if (!qi) + return; + + hw = qi->desc; + + spin_lock(&qi->q_lock); + while (qi->free_cnt < 3) { + spin_unlock(&qi->q_lock); + cpu_relax(); + spin_lock(&qi->q_lock); + } + + index = qi->free_head; + wait_index = (index + 1) % QI_LENGTH; + + qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE; + + hw[index] = *desc; + + wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE; + wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]); + + hw[wait_index] = wait_desc; + + __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc)); + __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc)); + + qi->free_head = (qi->free_head + 2) % QI_LENGTH; + qi->free_cnt -= 2; + + spin_lock_irqsave(&iommu->register_lock, flags); + /* + * update the HW tail register indicating the presence of + * new descriptors. + */ + writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG); + spin_unlock_irqrestore(&iommu->register_lock, flags); + + while (qi->desc_status[wait_index] != QI_DONE) { + spin_unlock(&qi->q_lock); + cpu_relax(); + spin_lock(&qi->q_lock); + } + + qi->desc_status[index] = QI_DONE; + + reclaim_free_desc(qi); + spin_unlock(&qi->q_lock); +} + +/* + * Flush the global interrupt entry cache. + */ +void qi_global_iec(struct intel_iommu *iommu) +{ + struct qi_desc desc; + + desc.low = QI_IEC_TYPE; + desc.high = 0; + + qi_submit_sync(&desc, iommu); +} + +/* + * Enable Queued Invalidation interface. This is a must to support + * interrupt-remapping. Also used by DMA-remapping, which replaces + * register based IOTLB invalidation. + */ +int dmar_enable_qi(struct intel_iommu *iommu) +{ + u32 cmd, sts; + unsigned long flags; + struct q_inval *qi; + + if (!ecap_qis(iommu->ecap)) + return -ENOENT; + + /* + * queued invalidation is already setup and enabled. + */ + if (iommu->qi) + return 0; + + iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL); + if (!iommu->qi) + return -ENOMEM; + + qi = iommu->qi; + + qi->desc = (void *)(get_zeroed_page(GFP_KERNEL)); + if (!qi->desc) { + kfree(qi); + iommu->qi = 0; + return -ENOMEM; + } + + qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL); + if (!qi->desc_status) { + free_page((unsigned long) qi->desc); + kfree(qi); + iommu->qi = 0; + return -ENOMEM; + } + + qi->free_head = qi->free_tail = 0; + qi->free_cnt = QI_LENGTH; + + spin_lock_init(&qi->q_lock); + + spin_lock_irqsave(&iommu->register_lock, flags); + /* write zero to the tail reg */ + writel(0, iommu->reg + DMAR_IQT_REG); + + dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc)); + + cmd = iommu->gcmd | DMA_GCMD_QIE; + iommu->gcmd |= DMA_GCMD_QIE; + writel(cmd, iommu->reg + DMAR_GCMD_REG); + + /* Make sure hardware complete it */ + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts); + spin_unlock_irqrestore(&iommu->register_lock, flags); + + return 0; +} diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index fb701d9..347bf2e 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -181,13 +181,6 @@ void free_iova_mem(struct iova *iova) kmem_cache_free(iommu_iova_cache, iova); } -static inline void __iommu_flush_cache( - struct intel_iommu *iommu, void *addr, int size) -{ - if (!ecap_coherent(iommu->ecap)) - clflush_cache_range(addr, size); -} - /* Gets context entry for a given bus and devfn */ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, u8 bus, u8 devfn) diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index 3a650e8..2983ce8 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -27,6 +27,7 @@ #include #include "iova.h" #include +#include #include "dma_remapping.h" /* @@ -51,6 +52,10 @@ #define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ #define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ #define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ +#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */ +#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ +#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ +#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */ #define OFFSET_STRIDE (9) /* @@ -114,6 +119,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define ecap_max_iotlb_offset(e) \ (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) #define ecap_coherent(e) ((e) & 0x1) +#define ecap_qis(e) ((e) & 0x2) #define ecap_eim_support(e) ((e >> 4) & 0x1) #define ecap_ir_support(e) ((e >> 3) & 0x1) @@ -131,6 +137,17 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_TLB_IH_NONLEAF (((u64)1) << 6) #define DMA_TLB_MAX_SIZE (0x3f) +/* INVALID_DESC */ +#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3) +#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3) +#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3) +#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7) +#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6) +#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16))) +#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6) +#define DMA_ID_TLB_ADDR(addr) (addr) +#define DMA_ID_TLB_ADDR_MASK(mask) (mask) + /* PMEN_REG */ #define DMA_PMEN_EPM (((u32)1)<<31) #define DMA_PMEN_PRS (((u32)1)<<0) @@ -140,6 +157,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_GCMD_SRTP (((u32)1) << 30) #define DMA_GCMD_SFL (((u32)1) << 29) #define DMA_GCMD_EAFL (((u32)1) << 28) +#define DMA_GCMD_QIE (((u32)1) << 26) #define DMA_GCMD_WBF (((u32)1) << 27) /* GSTS_REG */ @@ -147,6 +165,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_GSTS_RTPS (((u32)1) << 30) #define DMA_GSTS_FLS (((u32)1) << 29) #define DMA_GSTS_AFLS (((u32)1) << 28) +#define DMA_GSTS_QIES (((u32)1) << 26) #define DMA_GSTS_WBFS (((u32)1) << 27) /* CCMD_REG */ @@ -192,6 +211,40 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) }\ } +#define QI_LENGTH 256 /* queue length */ + +enum { + QI_FREE, + QI_IN_USE, + QI_DONE +}; + +#define QI_CC_TYPE 0x1 +#define QI_IOTLB_TYPE 0x2 +#define QI_DIOTLB_TYPE 0x3 +#define QI_IEC_TYPE 0x4 +#define QI_IWD_TYPE 0x5 + +#define QI_IEC_SELECTIVE (((u64)1) << 4) +#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32)) +#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27)) + +#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32) +#define QI_IWD_STATUS_WRITE (((u64)1) << 5) + +struct qi_desc { + u64 low, high; +}; + +struct q_inval { + spinlock_t q_lock; + struct qi_desc *desc; /* invalidation queue */ + int *desc_status; /* desc status */ + int free_head; /* first free entry */ + int free_tail; /* last free entry */ + int free_cnt; +}; + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; @@ -212,8 +265,16 @@ struct intel_iommu { struct msi_msg saved_msg; struct sys_device sysdev; #endif + struct q_inval *qi; /* Queued invalidation info */ }; +static inline void __iommu_flush_cache( + struct intel_iommu *iommu, void *addr, int size) +{ + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(addr, size); +} + extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); extern int alloc_iommu(struct dmar_drhd_unit *drhd); -- cgit v0.10.2 From 2ae21010694e56461a63bfc80e960090ce0a5ed9 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:43 -0700 Subject: x64, x2apic/intr-remap: Interrupt remapping infrastructure Interrupt remapping (part of Intel Virtualization Tech for directed I/O) infrastructure. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/drivers/pci/dma_remapping.h b/drivers/pci/dma_remapping.h index 05aac8e..bff5c65 100644 --- a/drivers/pci/dma_remapping.h +++ b/drivers/pci/dma_remapping.h @@ -145,6 +145,8 @@ struct device_domain_info { extern int init_dmars(void); extern void free_dmar_iommu(struct intel_iommu *iommu); +extern int dmar_disabled; + #ifndef CONFIG_DMAR_GFX_WA static inline void iommu_prepare_gfx_mapping(void) { diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index aba151c..23a119e 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -449,6 +449,22 @@ int __init early_dmar_detect(void) return (ACPI_SUCCESS(status) ? 1 : 0); } +void __init detect_intel_iommu(void) +{ + int ret; + + ret = early_dmar_detect(); + +#ifdef CONFIG_DMAR + { + if (ret && !no_iommu && !iommu_detected && !swiotlb && + !dmar_disabled) + iommu_detected = 1; + } +#endif +} + + int alloc_iommu(struct dmar_drhd_unit *drhd) { struct intel_iommu *iommu; diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 347bf2e..ffccf23 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -76,7 +76,7 @@ static long list_size; static void domain_remove_dev_info(struct dmar_domain *domain); -static int dmar_disabled; +int dmar_disabled; static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; @@ -2238,15 +2238,6 @@ static void __init iommu_exit_mempool(void) } -void __init detect_intel_iommu(void) -{ - if (swiotlb || no_iommu || iommu_detected || dmar_disabled) - return; - if (early_dmar_detect()) { - iommu_detected = 1; - } -} - static void __init init_no_remapping_devices(void) { struct dmar_drhd_unit *drhd; @@ -2293,15 +2284,19 @@ int __init intel_iommu_init(void) { int ret = 0; - if (no_iommu || swiotlb || dmar_disabled) - return -ENODEV; - if (dmar_table_init()) return -ENODEV; if (dmar_dev_scope_init()) return -ENODEV; + /* + * Check the need for DMA-remapping initialization now. + * Above initialization will also be used by Interrupt-remapping. + */ + if (no_iommu || swiotlb || dmar_disabled) + return -ENODEV; + iommu_init_mempool(); dmar_init_reserved_ranges(); diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index 2983ce8..a81a74e 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -56,6 +56,7 @@ #define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ #define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ #define DMAR_ICS_REG 0x98 /* Invalidation complete status register */ +#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ #define OFFSET_STRIDE (9) /* @@ -157,16 +158,20 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_GCMD_SRTP (((u32)1) << 30) #define DMA_GCMD_SFL (((u32)1) << 29) #define DMA_GCMD_EAFL (((u32)1) << 28) -#define DMA_GCMD_QIE (((u32)1) << 26) #define DMA_GCMD_WBF (((u32)1) << 27) +#define DMA_GCMD_QIE (((u32)1) << 26) +#define DMA_GCMD_SIRTP (((u32)1) << 24) +#define DMA_GCMD_IRE (((u32) 1) << 25) /* GSTS_REG */ #define DMA_GSTS_TES (((u32)1) << 31) #define DMA_GSTS_RTPS (((u32)1) << 30) #define DMA_GSTS_FLS (((u32)1) << 29) #define DMA_GSTS_AFLS (((u32)1) << 28) -#define DMA_GSTS_QIES (((u32)1) << 26) #define DMA_GSTS_WBFS (((u32)1) << 27) +#define DMA_GSTS_QIES (((u32)1) << 26) +#define DMA_GSTS_IRTPS (((u32)1) << 24) +#define DMA_GSTS_IRES (((u32)1) << 25) /* CCMD_REG */ #define DMA_CCMD_ICC (((u64)1) << 63) @@ -245,6 +250,16 @@ struct q_inval { int free_cnt; }; +#ifdef CONFIG_INTR_REMAP +/* 1MB - maximum possible interrupt remapping table size */ +#define INTR_REMAP_PAGE_ORDER 8 +#define INTR_REMAP_TABLE_REG_SIZE 0xf + +struct ir_table { + struct irte *base; +}; +#endif + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; @@ -266,6 +281,9 @@ struct intel_iommu { struct sys_device sysdev; #endif struct q_inval *qi; /* Queued invalidation info */ +#ifdef CONFIG_INTR_REMAP + struct ir_table *ir_table; /* Interrupt remapping info */ +#endif }; static inline void __iommu_flush_cache( @@ -279,5 +297,7 @@ extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); extern int alloc_iommu(struct dmar_drhd_unit *drhd); extern void free_iommu(struct intel_iommu *iommu); +extern int dmar_enable_qi(struct intel_iommu *iommu); +extern void qi_global_iec(struct intel_iommu *iommu); #endif diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index a80b879..3d10cdc 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -1,10 +1,147 @@ #include +#include +#include +#include #include #include "intel-iommu.h" #include "intr_remapping.h" static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; static int ir_ioapic_num; +int intr_remapping_enabled; + +static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) +{ + u64 addr; + u32 cmd, sts; + unsigned long flags; + + addr = virt_to_phys((void *)iommu->ir_table->base); + + spin_lock_irqsave(&iommu->register_lock, flags); + + dmar_writeq(iommu->reg + DMAR_IRTA_REG, + (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE); + + /* Set interrupt-remapping table pointer */ + cmd = iommu->gcmd | DMA_GCMD_SIRTP; + writel(cmd, iommu->reg + DMAR_GCMD_REG); + + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, (sts & DMA_GSTS_IRTPS), sts); + spin_unlock_irqrestore(&iommu->register_lock, flags); + + /* + * global invalidation of interrupt entry cache before enabling + * interrupt-remapping. + */ + qi_global_iec(iommu); + + spin_lock_irqsave(&iommu->register_lock, flags); + + /* Enable interrupt-remapping */ + cmd = iommu->gcmd | DMA_GCMD_IRE; + iommu->gcmd |= DMA_GCMD_IRE; + writel(cmd, iommu->reg + DMAR_GCMD_REG); + + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, (sts & DMA_GSTS_IRES), sts); + + spin_unlock_irqrestore(&iommu->register_lock, flags); +} + + +static int setup_intr_remapping(struct intel_iommu *iommu, int mode) +{ + struct ir_table *ir_table; + struct page *pages; + + ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table), + GFP_KERNEL); + + if (!iommu->ir_table) + return -ENOMEM; + + pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER); + + if (!pages) { + printk(KERN_ERR "failed to allocate pages of order %d\n", + INTR_REMAP_PAGE_ORDER); + kfree(iommu->ir_table); + return -ENOMEM; + } + + ir_table->base = page_address(pages); + + iommu_set_intr_remapping(iommu, mode); + return 0; +} + +int __init enable_intr_remapping(int eim) +{ + struct dmar_drhd_unit *drhd; + int setup = 0; + + /* + * check for the Interrupt-remapping support + */ + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu = drhd->iommu; + + if (!ecap_ir_support(iommu->ecap)) + continue; + + if (eim && !ecap_eim_support(iommu->ecap)) { + printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, " + " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap); + return -1; + } + } + + /* + * Enable queued invalidation for all the DRHD's. + */ + for_each_drhd_unit(drhd) { + int ret; + struct intel_iommu *iommu = drhd->iommu; + ret = dmar_enable_qi(iommu); + + if (ret) { + printk(KERN_ERR "DRHD %Lx: failed to enable queued, " + " invalidation, ecap %Lx, ret %d\n", + drhd->reg_base_addr, iommu->ecap, ret); + return -1; + } + } + + /* + * Setup Interrupt-remapping for all the DRHD's now. + */ + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu = drhd->iommu; + + if (!ecap_ir_support(iommu->ecap)) + continue; + + if (setup_intr_remapping(iommu, eim)) + goto error; + + setup = 1; + } + + if (!setup) + goto error; + + intr_remapping_enabled = 1; + + return 0; + +error: + /* + * handle error condition gracefully here! + */ + return -1; +} static int ir_parse_ioapic_scope(struct acpi_dmar_header *header, struct intel_iommu *iommu) diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h index c4a40b2..05f2635 100644 --- a/drivers/pci/intr_remapping.h +++ b/drivers/pci/intr_remapping.h @@ -4,3 +4,5 @@ struct ioapic_scope { struct intel_iommu *iommu; unsigned int id; }; + +#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0) diff --git a/include/linux/dmar.h b/include/linux/dmar.h index c4e96eb..8a0238d 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -25,9 +25,85 @@ #include #include -#ifdef CONFIG_DMAR +#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) struct intel_iommu; +struct dmar_drhd_unit { + struct list_head list; /* list of drhd units */ + struct acpi_dmar_header *hdr; /* ACPI header */ + u64 reg_base_addr; /* register base address*/ + struct pci_dev **devices; /* target device array */ + int devices_cnt; /* target device count */ + u8 ignored:1; /* ignore drhd */ + u8 include_all:1; + struct intel_iommu *iommu; +}; + +extern struct list_head dmar_drhd_units; + +#define for_each_drhd_unit(drhd) \ + list_for_each_entry(drhd, &dmar_drhd_units, list) + +extern int dmar_table_init(void); +extern int early_dmar_detect(void); +extern int dmar_dev_scope_init(void); + +/* Intel IOMMU detection */ +extern void detect_intel_iommu(void); + + +extern int parse_ioapics_under_ir(void); +extern int alloc_iommu(struct dmar_drhd_unit *); +#else +static inline void detect_intel_iommu(void) +{ + return; +} + +static inline int dmar_table_init(void) +{ + return -ENODEV; +} +#endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */ + +#ifdef CONFIG_INTR_REMAP +extern int intr_remapping_enabled; +extern int enable_intr_remapping(int); + +struct irte { + union { + struct { + __u64 present : 1, + fpd : 1, + dst_mode : 1, + redir_hint : 1, + trigger_mode : 1, + dlvry_mode : 3, + avail : 4, + __reserved_1 : 4, + vector : 8, + __reserved_2 : 8, + dest_id : 32; + }; + __u64 low; + }; + + union { + struct { + __u64 sid : 16, + sq : 2, + svt : 2, + __reserved_3 : 44; + }; + __u64 high; + }; +}; +#else +#define enable_intr_remapping(mode) (-1) +#define intr_remapping_enabled (0) +#endif + +#ifdef CONFIG_DMAR extern const char *dmar_get_fault_reason(u8 fault_reason); /* Can't use the common MSI interrupt functions @@ -40,29 +116,8 @@ extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); extern int arch_setup_dmar_msi(unsigned int irq); -/* Intel IOMMU detection and initialization functions */ -extern void detect_intel_iommu(void); -extern int intel_iommu_init(void); - -extern int dmar_table_init(void); -extern int early_dmar_detect(void); -extern int dmar_dev_scope_init(void); -extern int parse_ioapics_under_ir(void); - -extern struct list_head dmar_drhd_units; +extern int iommu_detected, no_iommu; extern struct list_head dmar_rmrr_units; - -struct dmar_drhd_unit { - struct list_head list; /* list of drhd units */ - struct acpi_dmar_header *hdr; /* ACPI header */ - u64 reg_base_addr; /* register base address*/ - struct pci_dev **devices; /* target device array */ - int devices_cnt; /* target device count */ - u8 ignored:1; /* ignore drhd */ - u8 include_all:1; - struct intel_iommu *iommu; -}; - struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ struct acpi_dmar_header *hdr; /* ACPI header */ @@ -72,24 +127,19 @@ struct dmar_rmrr_unit { int devices_cnt; /* target device count */ }; -#define for_each_drhd_unit(drhd) \ - list_for_each_entry(drhd, &dmar_drhd_units, list) #define for_each_rmrr_units(rmrr) \ list_for_each_entry(rmrr, &dmar_rmrr_units, list) - -extern int alloc_iommu(struct dmar_drhd_unit *); +/* Intel DMAR initialization functions */ +extern int intel_iommu_init(void); +extern int dmar_disabled; #else -static inline void detect_intel_iommu(void) -{ - return; -} static inline int intel_iommu_init(void) { +#ifdef CONFIG_INTR_REMAP + return dmar_dev_scope_init(); +#else return -ENODEV; -} -static inline int dmar_table_init(void) -{ - return -ENODEV; +#endif } #endif /* !CONFIG_DMAR */ #endif /* __DMAR_H__ */ -- cgit v0.10.2 From b6fcb33ad6c05f152a672f7c96c1fab006527b80 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:44 -0700 Subject: x64, x2apic/intr-remap: routines managing Interrupt remapping table entries. Routines handling the management of interrupt remapping table entries. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h index a81a74e..2142c01 100644 --- a/drivers/pci/intel-iommu.h +++ b/drivers/pci/intel-iommu.h @@ -123,6 +123,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define ecap_qis(e) ((e) & 0x2) #define ecap_eim_support(e) ((e >> 4) & 0x1) #define ecap_ir_support(e) ((e >> 3) & 0x1) +#define ecap_max_handle_mask(e) ((e >> 20) & 0xf) /* IOTLB_REG */ @@ -255,6 +256,8 @@ struct q_inval { #define INTR_REMAP_PAGE_ORDER 8 #define INTR_REMAP_TABLE_REG_SIZE 0xf +#define INTR_REMAP_TABLE_ENTRIES 65536 + struct ir_table { struct irte *base; }; @@ -300,4 +303,5 @@ extern void free_iommu(struct intel_iommu *iommu); extern int dmar_enable_qi(struct intel_iommu *iommu); extern void qi_global_iec(struct intel_iommu *iommu); +extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); #endif diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 3d10cdc..bddb4b1 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "intel-iommu.h" #include "intr_remapping.h" @@ -10,6 +11,248 @@ static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; static int ir_ioapic_num; int intr_remapping_enabled; +static struct { + struct intel_iommu *iommu; + u16 irte_index; + u16 sub_handle; + u8 irte_mask; +} irq_2_iommu[NR_IRQS]; + +static DEFINE_SPINLOCK(irq_2_ir_lock); + +int irq_remapped(int irq) +{ + if (irq > NR_IRQS) + return 0; + + if (!irq_2_iommu[irq].iommu) + return 0; + + return 1; +} + +int get_irte(int irq, struct irte *entry) +{ + int index; + + if (!entry || irq > NR_IRQS) + return -1; + + spin_lock(&irq_2_ir_lock); + if (!irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; + *entry = *(irq_2_iommu[irq].iommu->ir_table->base + index); + + spin_unlock(&irq_2_ir_lock); + return 0; +} + +int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) +{ + struct ir_table *table = iommu->ir_table; + u16 index, start_index; + unsigned int mask = 0; + int i; + + if (!count) + return -1; + + /* + * start the IRTE search from index 0. + */ + index = start_index = 0; + + if (count > 1) { + count = __roundup_pow_of_two(count); + mask = ilog2(count); + } + + if (mask > ecap_max_handle_mask(iommu->ecap)) { + printk(KERN_ERR + "Requested mask %x exceeds the max invalidation handle" + " mask value %Lx\n", mask, + ecap_max_handle_mask(iommu->ecap)); + return -1; + } + + spin_lock(&irq_2_ir_lock); + do { + for (i = index; i < index + count; i++) + if (table->base[i].present) + break; + /* empty index found */ + if (i == index + count) + break; + + index = (index + count) % INTR_REMAP_TABLE_ENTRIES; + + if (index == start_index) { + spin_unlock(&irq_2_ir_lock); + printk(KERN_ERR "can't allocate an IRTE\n"); + return -1; + } + } while (1); + + for (i = index; i < index + count; i++) + table->base[i].present = 1; + + irq_2_iommu[irq].iommu = iommu; + irq_2_iommu[irq].irte_index = index; + irq_2_iommu[irq].sub_handle = 0; + irq_2_iommu[irq].irte_mask = mask; + + spin_unlock(&irq_2_ir_lock); + + return index; +} + +static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask) +{ + struct qi_desc desc; + + desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask) + | QI_IEC_SELECTIVE; + desc.high = 0; + + qi_submit_sync(&desc, iommu); +} + +int map_irq_to_irte_handle(int irq, u16 *sub_handle) +{ + int index; + + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + *sub_handle = irq_2_iommu[irq].sub_handle; + index = irq_2_iommu[irq].irte_index; + spin_unlock(&irq_2_ir_lock); + return index; +} + +int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) +{ + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + irq_2_iommu[irq].iommu = iommu; + irq_2_iommu[irq].irte_index = index; + irq_2_iommu[irq].sub_handle = subhandle; + irq_2_iommu[irq].irte_mask = 0; + + spin_unlock(&irq_2_ir_lock); + + return 0; +} + +int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index) +{ + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + irq_2_iommu[irq].iommu = NULL; + irq_2_iommu[irq].irte_index = 0; + irq_2_iommu[irq].sub_handle = 0; + irq_2_iommu[irq].irte_mask = 0; + + spin_unlock(&irq_2_ir_lock); + + return 0; +} + +int modify_irte(int irq, struct irte *irte_modified) +{ + int index; + struct irte *irte; + struct intel_iommu *iommu; + + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + iommu = irq_2_iommu[irq].iommu; + + index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; + irte = &iommu->ir_table->base[index]; + + set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1)); + __iommu_flush_cache(iommu, irte, sizeof(*irte)); + + qi_flush_iec(iommu, index, 0); + + spin_unlock(&irq_2_ir_lock); + return 0; +} + +int flush_irte(int irq) +{ + int index; + struct intel_iommu *iommu; + + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + iommu = irq_2_iommu[irq].iommu; + + index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; + + qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask); + spin_unlock(&irq_2_ir_lock); + + return 0; +} + +int free_irte(int irq) +{ + int index, i; + struct irte *irte; + struct intel_iommu *iommu; + + spin_lock(&irq_2_ir_lock); + if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) { + spin_unlock(&irq_2_ir_lock); + return -1; + } + + iommu = irq_2_iommu[irq].iommu; + + index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle; + irte = &iommu->ir_table->base[index]; + + if (!irq_2_iommu[irq].sub_handle) { + for (i = 0; i < (1 << irq_2_iommu[irq].irte_mask); i++) + set_64bit((unsigned long *)irte, 0); + qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask); + } + + irq_2_iommu[irq].iommu = NULL; + irq_2_iommu[irq].irte_index = 0; + irq_2_iommu[irq].sub_handle = 0; + irq_2_iommu[irq].irte_mask = 0; + + spin_unlock(&irq_2_ir_lock); + + return 0; +} + static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) { u64 addr; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 8a0238d..324bbca 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -98,7 +98,19 @@ struct irte { __u64 high; }; }; +extern int get_irte(int irq, struct irte *entry); +extern int modify_irte(int irq, struct irte *irte_modified); +extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count); +extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, + u16 sub_handle); +extern int map_irq_to_irte_handle(int irq, u16 *sub_handle); +extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index); +extern int flush_irte(int irq); +extern int free_irte(int irq); + +extern int irq_remapped(int irq); #else +#define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) #define intr_remapping_enabled (0) #endif -- cgit v0.10.2 From 72b1e22dfcad1daca6906148fd956ffe404bb0bc Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:45 -0700 Subject: x64, x2apic/intr-remap: generic irq migration support from process context Generic infrastructure for migrating the irq from the process context in the presence of CONFIG_GENERIC_PENDING_IRQ. This will be used later for migrating irq in the presence of interrupt-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/include/linux/irq.h b/include/linux/irq.h index 552e0ec..c211984 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -62,6 +62,7 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, #define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */ #define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */ #define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */ +#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */ #ifdef CONFIG_IRQ_PER_CPU # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 46d6611..628b557 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -87,7 +87,14 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask) set_balance_irq_affinity(irq, cpumask); #ifdef CONFIG_GENERIC_PENDING_IRQ - set_pending_irq(irq, cpumask); + if (desc->status & IRQ_MOVE_PCNTXT) { + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + desc->chip->set_affinity(irq, cpumask); + spin_unlock_irqrestore(&desc->lock, flags); + } else + set_pending_irq(irq, cpumask); #else desc->affinity = cpumask; desc->chip->set_affinity(irq, cpumask); -- cgit v0.10.2 From d94d93ca5cc36cd78c532def62772c98fe8ba5d7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:46 -0700 Subject: x64, x2apic/intr-remap: 8259 specific mask/unmask routines 8259 specific mask/unmask routines which be used later while enabling interrupt-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index dc92b49..4b8a53d 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void) device_initcall(i8259A_init_sysfs); +void mask_8259A(void) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ + + spin_unlock_irqrestore(&i8259A_lock, flags); +} + +void unmask_8259A(void) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + + outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ + outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ + + spin_unlock_irqrestore(&i8259A_lock, flags); +} + void init_8259A(int auto_eoi) { unsigned long flags; diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h index 2f98df9..31112b6 100644 --- a/include/asm-x86/i8259.h +++ b/include/asm-x86/i8259.h @@ -57,4 +57,7 @@ static inline void outb_pic(unsigned char value, unsigned int port) extern struct irq_chip i8259A_chip; +extern void mask_8259A(void); +extern void unmask_8259A(void); + #endif /* __ASM_I8259_H__ */ -- cgit v0.10.2 From 4dc2f96cacd1e74c688f94348a3bfd0a980817d5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:47 -0700 Subject: x64, x2apic/intr-remap: ioapic routines which deal with initial io-apic RTE setup Generic ioapic specific routines which be used later during enabling interrupt-remapping. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 9e645cb..84dd63c 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -114,6 +114,9 @@ DEFINE_SPINLOCK(vector_lock); */ int nr_ioapic_registers[MAX_IO_APICS]; +/* I/O APIC RTE contents at the OS boot up */ +struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; + /* I/O APIC entries */ struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; @@ -446,6 +449,69 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } +/* + * Saves and masks all the unmasked IO-APIC RTE's + */ +int save_mask_IO_APIC_setup(void) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + int apic, pin; + + /* + * The number of IO-APIC IRQ registers (== #pins): + */ + for (apic = 0; apic < nr_ioapics; apic++) { + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(apic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + nr_ioapic_registers[apic] = reg_01.bits.entries+1; + } + + for (apic = 0; apic < nr_ioapics; apic++) { + early_ioapic_entries[apic] = + kzalloc(sizeof(struct IO_APIC_route_entry) * + nr_ioapic_registers[apic], GFP_KERNEL); + if (!early_ioapic_entries[apic]) + return -ENOMEM; + } + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + struct IO_APIC_route_entry entry; + + entry = early_ioapic_entries[apic][pin] = + ioapic_read_entry(apic, pin); + if (!entry.mask) { + entry.mask = 1; + ioapic_write_entry(apic, pin, entry); + } + } + return 0; +} + +void restore_IO_APIC_setup(void) +{ + int apic, pin; + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + ioapic_write_entry(apic, pin, + early_ioapic_entries[apic][pin]); +} + +void reinit_intr_remapped_IO_APIC(int intr_remapping) +{ + /* + * for now plain restore of previous settings. + * TBD: In the case of OS enabling interrupt-remapping, + * IO-APIC RTE's need to be setup to point to interrupt-remapping + * table entries. for now, do a plain restore, and wait for + * the setup_IO_APIC_irqs() to do proper initialization. + */ + restore_IO_APIC_setup(); +} + int skip_ioapic_setup; int ioapic_force; diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index 14f82bb..1c4a99d 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -183,6 +183,12 @@ extern int io_apic_set_pci_routing(int ioapic, int pin, int irq, extern int (*ioapic_renumber_irq)(int ioapic, int irq); extern void ioapic_init_mappings(void); +#ifdef CONFIG_X86_64 +extern int save_mask_IO_APIC_setup(void); +extern void restore_IO_APIC_setup(void); +extern void reinit_intr_remapped_IO_APIC(int); +#endif + #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 static const int timer_through_8259 = 0; -- cgit v0.10.2 From 0c81c746f9bdbfaafe64322d540c8b7b59c27314 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:48 -0700 Subject: x64, x2apic/intr-remap: introduce read_apic_id() to genapic routines Move the read_apic_id() to genapic routines. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 1e3d32e..9dd4ee4 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1096,6 +1096,11 @@ void __cpuinit generic_processor_info(int apicid, int version) cpu_set(cpu, cpu_present_map); } +int hard_smp_processor_id(void) +{ + return read_apic_id(); +} + /* * Power management */ diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 1fa8be5..7414871 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -79,17 +79,6 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -unsigned int read_apic_id(void) -{ - unsigned int id; - - WARN_ON(preemptible() && num_online_cpus() > 1); - id = apic_read(APIC_ID); - if (uv_system_type >= UV_X2APIC) - id |= __get_cpu_var(x2apic_extra_bits); - return id; -} - enum uv_system_type get_uv_system_type(void) { return uv_system_type; diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 1a9c688..400ed8d 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -15,9 +15,11 @@ #include #include #include +#include #include #include #include +#include static cpumask_t flat_target_cpus(void) { @@ -95,9 +97,17 @@ static void flat_send_IPI_all(int vector) __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } +static unsigned int read_xapic_id(void) +{ + unsigned int id; + + id = GET_XAPIC_ID(apic_read(APIC_ID)); + return id; +} + static int flat_apic_id_registered(void) { - return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); + return physid_isset(read_xapic_id(), phys_cpu_present_map); } static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) @@ -123,6 +133,7 @@ struct genapic apic_flat = { .send_IPI_mask = flat_send_IPI_mask, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, + .read_apic_id = read_xapic_id, }; /* @@ -187,4 +198,5 @@ struct genapic apic_physflat = { .send_IPI_mask = physflat_send_IPI_mask, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, + .read_apic_id = read_xapic_id, }; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 711f11c..1ef99be 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -134,9 +135,19 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) return BAD_APICID; } +static unsigned int uv_read_apic_id(void) +{ + unsigned int id; + + WARN_ON(preemptible() && num_online_cpus() > 1); + id = apic_read(APIC_ID) | __get_cpu_var(x2apic_extra_bits); + + return id; +} + static unsigned int phys_pkg_id(int index_msb) { - return GET_APIC_ID(read_apic_id()) >> index_msb; + return uv_read_apic_id() >> index_msb; } #ifdef ZZZ /* Needs x2apic patch */ @@ -159,6 +170,7 @@ struct genapic apic_x2apic_uv_x = { /* ZZZ.send_IPI_self = uv_send_IPI_self, */ .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ + .read_apic_id = uv_read_apic_id, }; static __cpuinit void set_x2apic_extra_bits(int pnode) diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index 647e4e5..d567abc 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -27,6 +27,7 @@ struct genapic { /* */ unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); unsigned int (*phys_pkg_id)(int index_msb); + unsigned int (*read_apic_id)(void); }; extern struct genapic *genapic; diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h index 0b2cde5..d172c55 100644 --- a/include/asm-x86/mach-default/mach_apic.h +++ b/include/asm-x86/mach-default/mach_apic.h @@ -30,6 +30,7 @@ static inline cpumask_t target_cpus(void) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) #define phys_pkg_id (genapic->phys_pkg_id) #define vector_allocation_domain (genapic->vector_allocation_domain) +#define read_apic_id (genapic->read_apic_id) extern void setup_apic_routing(void); #else #define INT_DELIVERY_MODE dest_LowestPrio diff --git a/include/asm-x86/mach-default/mach_apicdef.h b/include/asm-x86/mach-default/mach_apicdef.h index e4b29ba..453b58a 100644 --- a/include/asm-x86/mach-default/mach_apicdef.h +++ b/include/asm-x86/mach-default/mach_apicdef.h @@ -5,8 +5,9 @@ #ifdef CONFIG_X86_64 #define APIC_ID_MASK (0xFFu<<24) -#define GET_APIC_ID(x) (((x)>>24)&0xFFu) +#define GET_APIC_ID(x) (x) #define SET_APIC_ID(x) (((x)<<24)) +#define GET_XAPIC_ID(x) (((x) >> 24) & 0xFFu) #else #define APIC_ID_MASK (0xF<<24) static inline unsigned get_apic_id(unsigned long x) diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index 2e221f1..9848715 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -169,12 +169,10 @@ static inline unsigned int read_apic_id(void) { return *(u32 *)(APIC_BASE + APIC_ID); } -#else -extern unsigned int read_apic_id(void); #endif -# ifdef APIC_DEFINITION +# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64) extern int hard_smp_processor_id(void); # else # include -- cgit v0.10.2 From 2d7a66d02e11af9ab8e16c76d22767e622b4e3d7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 11 Jul 2008 14:24:19 -0700 Subject: x64, x2apic/intr-remap: Interrupt-remapping and x2apic support, fix Yinghai Lu wrote: > Setting APIC routing to physical flat > Kernel panic - not syncing: Boot APIC ID in local APIC unexpected (0 vs 4) > Pid: 1, comm: swapper Not tainted 2.6.26-rc9-tip-01763-g74f94b1-dirty #320 > > Call Trace: > [] ? set_cpu_sibling_map+0x38c/0x3bd > [] ? read_xapic_id+0x25/0x3e > [] ? verify_local_APIC+0x139/0x1b9 > [] ? read_xapic_id+0x25/0x3e > [] ? native_smp_prepare_cpus+0x224/0x2e9 > [] ? kernel_init+0x64/0x341 > [] ? child_rip+0xa/0x11 > [] ? kernel_init+0x0/0x341 > [] ? child_rip+0x0/0x11 > > > guess read_apic_id changing cuase some problem... genapic's read_apic_id() returns the actual apic id extracted from the APIC_ID register. And in some cases like UV, read_apic_id() returns completely different values from APIC ID register. Use the native apic register read, rather than genapic read_apic_id() in verify_local_APIC() And also, lapic_suspend() should also use native apic register read. Reported-by: Yinghai Lu Signed-off-by: Suresh Siddha Cc: "akpm@linux-foundation.org" Cc: "arjan@linux.intel.com" Cc: "andi@firstfloor.org" Cc: "ebiederm@xmission.com" Cc: "jbarnes@virtuousgeek.org" Cc: "steiner@sgi.com" Cc: "jeremy@goop.org" Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 9dd4ee4..3963f59 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -626,10 +626,10 @@ int __init verify_local_APIC(void) /* * The ID register is read/write in a real APIC. */ - reg0 = read_apic_id(); + reg0 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); - reg1 = read_apic_id(); + reg1 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); apic_write(APIC_ID, reg0); if (reg1 != (reg0 ^ APIC_ID_MASK)) @@ -1137,7 +1137,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) maxlvt = lapic_get_maxlvt(); - apic_pm_state.apic_id = read_apic_id(); + apic_pm_state.apic_id = apic_read(APIC_ID); apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); apic_pm_state.apic_ldr = apic_read(APIC_LDR); apic_pm_state.apic_dfr = apic_read(APIC_DFR); -- cgit v0.10.2 From 1b374e4d6f8b3eb2fcd034fcc24ea8ba1dfde7aa Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:49 -0700 Subject: x64, x2apic/intr-remap: basic apic ops support Introduce basic apic operations which handle the apic programming. This will be used later to introduce another specific operations for x2apic. For the perfomance critial accesses like IPI's, EOI etc, we use the native operations as they are already referenced by different indirections like genapic, irq_chip etc. 64bit Paravirt ops can also define their apic operations accordingly. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3e58b67..2a83c07 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -145,6 +145,12 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } +void apic_icr_write(u32 low, u32 id) +{ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id)); + apic_write_around(APIC_ICR, low); +} + void apic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 3963f59..9bb0406 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -119,13 +119,13 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } -void apic_wait_icr_idle(void) +void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } -u32 safe_apic_wait_icr_idle(void) +u32 safe_xapic_wait_icr_idle(void) { u32 send_status; int timeout; @@ -141,6 +141,36 @@ u32 safe_apic_wait_icr_idle(void) return send_status; } +void xapic_icr_write(u32 low, u32 id) +{ + apic_write(APIC_ICR2, id << 24); + apic_write(APIC_ICR, low); +} + +u64 xapic_icr_read(void) +{ + u32 icr1, icr2; + + icr2 = apic_read(APIC_ICR2); + icr1 = apic_read(APIC_ICR); + + return (icr1 | ((u64)icr2 << 32)); +} + +static struct apic_ops xapic_ops = { + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .write_atomic = native_apic_mem_write_atomic, + .icr_read = xapic_icr_read, + .icr_write = xapic_icr_write, + .wait_icr_idle = xapic_wait_icr_idle, + .safe_wait_icr_idle = safe_xapic_wait_icr_idle, +}; + +struct apic_ops __read_mostly *apic_ops = &xapic_ops; + +EXPORT_SYMBOL_GPL(apic_ops); + /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 84dd63c..b62d42e 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1157,6 +1157,7 @@ static __apicdebuginit void print_APIC_bitfield (int base) void __apicdebuginit print_local_APIC(void * dummy) { unsigned int v, ver, maxlvt; + unsigned long icr; if (apic_verbosity == APIC_QUIET) return; @@ -1200,10 +1201,9 @@ void __apicdebuginit print_local_APIC(void * dummy) v = apic_read(APIC_ESR); printk(KERN_DEBUG "... APIC ESR: %08x\n", v); - v = apic_read(APIC_ICR); - printk(KERN_DEBUG "... APIC ICR: %08x\n", v); - v = apic_read(APIC_ICR2); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); + icr = apic_icr_read(); + printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); v = apic_read(APIC_LVTT); printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e0f571d..b80105a 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -360,9 +360,11 @@ struct pv_cpu_ops pv_cpu_ops = { struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC - .apic_write = native_apic_write, - .apic_write_atomic = native_apic_write_atomic, - .apic_read = native_apic_read, +#ifnded CONFIG_X86_64 + .apic_write = native_apic_mem_write, + .apic_write_atomic = native_apic_mem_write_atomic, + .apic_read = native_apic_mem_read, +#endif .setup_boot_clock = setup_boot_APIC_clock, .setup_secondary_clock = setup_secondary_APIC_clock, .startup_ipi_hook = paravirt_nop, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f35c2d8..c55263b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -123,7 +123,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); static atomic_t init_deasserted; -static int boot_cpu_logical_apicid; /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu) #endif #ifdef CONFIG_X86_32 +static int boot_cpu_logical_apicid; + u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; @@ -546,8 +547,7 @@ static inline void __inquire_remote_apic(int apicid) printk(KERN_CONT "a previous APIC delivery may have failed\n"); - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); - apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); + apic_icr_write(APIC_DM_REMRD | regs[i], apicid); timeout = 0; do { @@ -579,11 +579,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) int maxlvt; /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); - /* Boot on the stack */ /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); + apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid); Dprintk("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -639,13 +637,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) /* * Turn INIT on target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - /* * Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT - | APIC_DM_INIT); + apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, + phys_apicid); Dprintk("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -655,10 +651,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) Dprintk("Deasserting INIT.\n"); /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - /* Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid); Dprintk("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -703,12 +697,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) */ /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - /* Boot on the stack */ /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_STARTUP - | (start_eip >> 12)); + apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12), + phys_apicid); /* * Give the other CPU some time to accept the IPI. @@ -1147,7 +1139,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) * Setup boot CPU information */ smp_store_cpu_info(0); /* Final full version of the data */ +#ifdef CONFIG_X86_32 boot_cpu_logical_apicid = logical_smp_processor_id(); +#endif current_thread_info()->cpu = 0; /* needed? */ set_cpu_sibling_map(0); diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index 4e2c1e5..6fda195 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -47,32 +47,59 @@ extern int disable_apic; #ifdef CONFIG_PARAVIRT #include #else -#define apic_write native_apic_write -#define apic_write_atomic native_apic_write_atomic -#define apic_read native_apic_read +#ifndef CONFIG_X86_64 +#define apic_write native_apic_mem_write +#define apic_write_atomic native_apic_mem_write_atomic +#define apic_read native_apic_mem_read +#endif #define setup_boot_clock setup_boot_APIC_clock #define setup_secondary_clock setup_secondary_APIC_clock #endif extern int is_vsmp_box(void); -static inline void native_apic_write(unsigned long reg, u32 v) +static inline void native_apic_mem_write(u32 reg, u32 v) { *((volatile u32 *)(APIC_BASE + reg)) = v; } -static inline void native_apic_write_atomic(unsigned long reg, u32 v) +static inline void native_apic_mem_write_atomic(u32 reg, u32 v) { (void)xchg((u32 *)(APIC_BASE + reg), v); } -static inline u32 native_apic_read(unsigned long reg) +static inline u32 native_apic_mem_read(u32 reg) { return *((volatile u32 *)(APIC_BASE + reg)); } +#ifdef CONFIG_X86_32 extern void apic_wait_icr_idle(void); extern u32 safe_apic_wait_icr_idle(void); +extern void apic_icr_write(u32 low, u32 id); +#else + +struct apic_ops { + u32 (*read)(u32 reg); + void (*write)(u32 reg, u32 v); + void (*write_atomic)(u32 reg, u32 v); + u64 (*icr_read)(void); + void (*icr_write)(u32 low, u32 high); + void (*wait_icr_idle)(void); + u32 (*safe_wait_icr_idle)(void); +}; + +extern struct apic_ops *apic_ops; + +#define apic_read (apic_ops->read) +#define apic_write (apic_ops->write) +#define apic_write_atomic (apic_ops->write_atomic) +#define apic_icr_read (apic_ops->icr_read) +#define apic_icr_write (apic_ops->icr_write) +#define apic_wait_icr_idle (apic_ops->wait_icr_idle) +#define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle) +#endif + extern int get_physical_broadcast(void); #ifdef CONFIG_X86_GOOD_APIC @@ -95,7 +122,11 @@ static inline void ack_APIC_irq(void) */ /* Docs say use 0 for future compatibility */ +#ifdef CONFIG_X86_32 apic_write_around(APIC_EOI, 0); +#else + native_apic_mem_write(APIC_EOI, 0); +#endif } extern int lapic_get_maxlvt(void); diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h index 196d63c..3d8d6a6 100644 --- a/include/asm-x86/ipi.h +++ b/include/asm-x86/ipi.h @@ -49,6 +49,12 @@ static inline int __prepare_ICR2(unsigned int mask) return SET_APIC_DEST_FIELD(mask); } +static inline void __xapic_wait_icr_idle(void) +{ + while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY) + cpu_relax(); +} + static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) { @@ -64,7 +70,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, /* * Wait for idle. */ - apic_wait_icr_idle(); + __xapic_wait_icr_idle(); /* * No need to touch the target chip field @@ -74,7 +80,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, /* * Send the IPI. The write to APIC_ICR fires this off. */ - apic_write(APIC_ICR, cfg); + native_apic_mem_write(APIC_ICR, cfg); } /* @@ -92,13 +98,13 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, if (unlikely(vector == NMI_VECTOR)) safe_apic_wait_icr_idle(); else - apic_wait_icr_idle(); + __xapic_wait_icr_idle(); /* * prepare target chip field */ cfg = __prepare_ICR2(mask); - apic_write(APIC_ICR2, cfg); + native_apic_mem_write(APIC_ICR2, cfg); /* * program the ICR @@ -108,7 +114,7 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, /* * Send the IPI. The write to APIC_ICR fires this off. */ - apic_write(APIC_ICR, cfg); + native_apic_mem_write(APIC_ICR, cfg); } static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index ef5e8ec..10adac0 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -891,6 +891,7 @@ static inline void slow_down_io(void) /* * Basic functions accessing APICs. */ +#ifndef CONFIG_X86_64 static inline void apic_write(unsigned long reg, u32 v) { PVOP_VCALL2(pv_apic_ops.apic_write, reg, v); @@ -905,6 +906,7 @@ static inline u32 apic_read(unsigned long reg) { return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg); } +#endif static inline void setup_boot_clock(void) { diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index 9848715..d9d007d 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -158,13 +158,13 @@ extern int safe_smp_processor_id(void); #ifdef CONFIG_X86_LOCAL_APIC +#ifndef CONFIG_X86_64 static inline int logical_smp_processor_id(void) { /* we don't want to mark this access volatile - bad code generation */ return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); } -#ifndef CONFIG_X86_64 static inline unsigned int read_apic_id(void) { return *(u32 *)(APIC_BASE + APIC_ID); -- cgit v0.10.2 From 32e1d0a0651004f5fe47f85a2a5c725ad579a90c Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:50 -0700 Subject: x64, x2apic/intr-remap: cpuid bits for x2apic feature cpuid feature for x2apic. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c index e43ad4a..0bf4d37 100644 --- a/arch/x86/kernel/cpu/feature_names.c +++ b/arch/x86/kernel/cpu/feature_names.c @@ -45,7 +45,7 @@ const char * const x86_cap_flags[NCAPINTS*32] = { /* Intel-defined (#2) */ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", + NULL, NULL, "dca", "sse4_1", "sse4_2", "x2apic", NULL, "popcnt", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 75ef959..5be9510 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -90,6 +90,7 @@ #define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ #define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ #define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ #define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */ @@ -188,6 +189,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) +#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 -- cgit v0.10.2 From 1cb11583a6c4ceda7426eb36f7bf0419da8dfbc2 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:51 -0700 Subject: x64, x2apic/intr-remap: disable DMA-remapping if Interrupt-remapping is detected (temporary quirk) Interrupt-remapping enables queued invalidation. And once queued invalidation is enabled, IOTLB invalidation also needs to use the queued invalidation mechanism and the register based IOTLB invalidation doesn't work. For now, Support for IOTLB invalidation using queued invalidation is missing. Meanwhile, disable DMA-remapping, if Interrupt-remapping support is detected. For the meanwhile, if someone wants to really enable DMA-remapping, they can use nox2apic, which will disable interrupt-remapping and as such doesn't enable queued invalidation. And given that none of the release platforms support intr-remapping yet, we should be ok for this temporary hack. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 23a119e..bd2c016 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -457,6 +457,31 @@ void __init detect_intel_iommu(void) #ifdef CONFIG_DMAR { + struct acpi_table_dmar *dmar; + /* + * for now we will disable dma-remapping when interrupt + * remapping is enabled. + * When support for queued invalidation for IOTLB invalidation + * is added, we will not need this any more. + */ + dmar = (struct acpi_table_dmar *) dmar_tbl; + if (ret && cpu_has_x2apic && dmar->flags & 0x1) { + printk(KERN_INFO + "Queued invalidation will be enabled to support " + "x2apic and Intr-remapping.\n"); + printk(KERN_INFO + "Disabling IOMMU detection, because of missing " + "queued invalidation support for IOTLB " + "invalidation\n"); + printk(KERN_INFO + "Use \"nox2apic\", if you want to use Intel " + " IOMMU for DMA-remapping and don't care about " + " x2apic support\n"); + + dmar_disabled = 1; + return; + } + if (ret && !no_iommu && !iommu_detected && !swiotlb && !dmar_disabled) iommu_detected = 1; -- cgit v0.10.2 From 13c88fb58d0112d47f7839f24a755715c6218822 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:52 -0700 Subject: x64, x2apic/intr-remap: x2apic ops for x2apic mode support x2apic ops for x2apic mode support. This uses MSR interface and differs slightly from the xapic register layout. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 9bb0406..a969ef7 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -171,6 +171,41 @@ struct apic_ops __read_mostly *apic_ops = &xapic_ops; EXPORT_SYMBOL_GPL(apic_ops); +static void x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return; +} + +static u32 safe_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return 0; +} + +void x2apic_icr_write(u32 low, u32 id) +{ + wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); +} + +u64 x2apic_icr_read(void) +{ + unsigned long val; + + rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); + return val; +} + +static struct apic_ops x2apic_ops = { + .read = native_apic_msr_read, + .write = native_apic_msr_write, + .write_atomic = native_apic_msr_write, + .icr_read = x2apic_icr_read, + .icr_write = x2apic_icr_write, + .wait_icr_idle = x2apic_wait_icr_idle, + .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, +}; + /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index 6fda195..bb54928 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -7,6 +7,8 @@ #include #include #include +#include +#include #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -73,6 +75,26 @@ static inline u32 native_apic_mem_read(u32 reg) return *((volatile u32 *)(APIC_BASE + reg)); } +static inline void native_apic_msr_write(u32 reg, u32 v) +{ + if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || + reg == APIC_LVR) + return; + + wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0); +} + +static inline u32 native_apic_msr_read(u32 reg) +{ + u32 low, high; + + if (reg == APIC_DFR) + return -1; + + rdmsr(APIC_BASE_MSR + (reg >> 4), low, high); + return low; +} + #ifdef CONFIG_X86_32 extern void apic_wait_icr_idle(void); extern u32 safe_apic_wait_icr_idle(void); diff --git a/include/asm-x86/apicdef.h b/include/asm-x86/apicdef.h index 6b9008c..bcae297 100644 --- a/include/asm-x86/apicdef.h +++ b/include/asm-x86/apicdef.h @@ -105,6 +105,7 @@ #define APIC_TMICT 0x380 #define APIC_TMCCT 0x390 #define APIC_TDCR 0x3E0 +#define APIC_SELF_IPI 0x3F0 #define APIC_TDR_DIV_TMBASE (1 << 2) #define APIC_TDR_DIV_1 0xB #define APIC_TDR_DIV_2 0x0 @@ -128,6 +129,8 @@ #define APIC_EILVT3 0x530 #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) +#define APIC_BASE_MSR 0x800 +#define X2APIC_ENABLE (1UL << 10) #ifdef CONFIG_X86_32 # define MAX_IO_APICS 64 -- cgit v0.10.2 From cff73a6ffaed726780b001937d2a42efde553922 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:53 -0700 Subject: x64, x2apic/intr-remap: introcude self IPI to genapic routines Introduce self IPI op for genapic. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 7414871..6657de6 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -61,7 +61,7 @@ void __init setup_apic_routing(void) /* Same for both flat and physical. */ -void send_IPI_self(int vector) +void apic_send_IPI_self(int vector) { __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); } diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 400ed8d..7355868 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -131,6 +131,7 @@ struct genapic apic_flat = { .send_IPI_all = flat_send_IPI_all, .send_IPI_allbutself = flat_send_IPI_allbutself, .send_IPI_mask = flat_send_IPI_mask, + .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, .read_apic_id = read_xapic_id, @@ -196,6 +197,7 @@ struct genapic apic_physflat = { .send_IPI_all = physflat_send_IPI_all, .send_IPI_allbutself = physflat_send_IPI_allbutself, .send_IPI_mask = physflat_send_IPI_mask, + .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, .read_apic_id = read_xapic_id, diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index d567abc..6777d71 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -24,6 +24,7 @@ struct genapic { void (*send_IPI_mask)(cpumask_t mask, int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); + void (*send_IPI_self)(int vector); /* */ unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); unsigned int (*phys_pkg_id)(int index_msb); @@ -36,6 +37,7 @@ extern struct genapic apic_flat; extern struct genapic apic_physflat; extern int acpi_madt_oem_check(char *, char *); +extern void apic_send_IPI_self(int vector); enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; extern enum uv_system_type get_uv_system_type(void); extern int is_uv_system(void); diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index 18f067c..2ae47e7 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -72,7 +72,9 @@ extern void enable_IO_APIC(void); #endif /* IPI functions */ +#ifdef CONFIG_X86_32 extern void send_IPI_self(int vector); +#endif extern void send_IPI(int dest, int vector); /* Statistics */ diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h index d172c55..e06d239 100644 --- a/include/asm-x86/mach-default/mach_apic.h +++ b/include/asm-x86/mach-default/mach_apic.h @@ -31,6 +31,7 @@ static inline cpumask_t target_cpus(void) #define phys_pkg_id (genapic->phys_pkg_id) #define vector_allocation_domain (genapic->vector_allocation_domain) #define read_apic_id (genapic->read_apic_id) +#define send_IPI_self (genapic->send_IPI_self) extern void setup_apic_routing(void); #else #define INT_DELIVERY_MODE dest_LowestPrio -- cgit v0.10.2 From 12a67cf6851871ca8df42025c94f140c303d0f7f Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:54 -0700 Subject: x64, x2apic/intr-remap: x2apic cluster mode support x2apic cluster mode support. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 55ff016..bde3e9b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -95,6 +95,7 @@ obj-$(CONFIG_OLPC) += olpc.o # 64 bit specific files ifeq ($(CONFIG_X86_64),y) obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o + obj-y += genx2apic_cluster.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 6657de6..1029e17 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -38,6 +38,8 @@ void __init setup_apic_routing(void) { if (uv_system_type == UV_NON_UNIQUE_APIC) genapic = &apic_x2apic_uv_x; + else if (cpu_has_x2apic && intr_remapping_enabled) + genapic = &apic_x2apic_cluster; else #ifdef CONFIG_ACPI /* diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c new file mode 100644 index 0000000..ed0fded --- /dev/null +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -0,0 +1,135 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); + +/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ + +static cpumask_t x2apic_target_cpus(void) +{ + return cpumask_of_cpu(0); +} + +/* + * for now each logical cpu is in its own vector allocation domain. + */ +static cpumask_t x2apic_vector_allocation_domain(int cpu) +{ + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; +} + +static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, + unsigned int dest) +{ + unsigned long cfg; + + cfg = __prepare_ICR(0, vector, dest); + + /* + * send the IPI. + */ + x2apic_icr_write(cfg, apicid); +} + +/* + * for now, we send the IPI's one by one in the cpumask. + * TBD: Based on the cpu mask, we can send the IPI's to the cluster group + * at once. We have 16 cpu's in a cluster. This will minimize IPI register + * writes. + */ +static void x2apic_send_IPI_mask(cpumask_t mask, int vector) +{ + unsigned long flags; + unsigned long query_cpu; + + local_irq_save(flags); + for_each_cpu_mask(query_cpu, mask) { + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, APIC_DEST_LOGICAL); + } + local_irq_restore(flags); +} + +static void x2apic_send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + x2apic_send_IPI_mask(mask, vector); +} + +static void x2apic_send_IPI_all(int vector) +{ + x2apic_send_IPI_mask(cpu_online_map, vector); +} + +static int x2apic_apic_id_registered(void) +{ + return 1; +} + +static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) + return per_cpu(x86_cpu_to_logical_apicid, cpu); + else + return BAD_APICID; +} + +static unsigned int x2apic_read_id(void) +{ + return apic_read(APIC_ID); +} + +static unsigned int phys_pkg_id(int index_msb) +{ + return x2apic_read_id() >> index_msb; +} + +static void x2apic_send_IPI_self(int vector) +{ + apic_write(APIC_SELF_IPI, vector); +} + +static void init_x2apic_ldr(void) +{ + int cpu = smp_processor_id(); + + per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); + return; +} + +struct genapic apic_x2apic_cluster = { + .name = "cluster x2apic", + .int_delivery_mode = dest_LowestPrio, + .int_dest_mode = (APIC_DEST_LOGICAL != 0), + .target_cpus = x2apic_target_cpus, + .vector_allocation_domain = x2apic_vector_allocation_domain, + .apic_id_registered = x2apic_apic_id_registered, + .init_apic_ldr = init_x2apic_ldr, + .send_IPI_all = x2apic_send_IPI_all, + .send_IPI_allbutself = x2apic_send_IPI_allbutself, + .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_self = x2apic_send_IPI_self, + .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .phys_pkg_id = phys_pkg_id, + .read_apic_id = x2apic_read_id, +}; diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index 6777d71..2324603 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -35,6 +35,7 @@ extern struct genapic *genapic; extern struct genapic apic_flat; extern struct genapic apic_physflat; +extern struct genapic apic_x2apic_cluster; extern int acpi_madt_oem_check(char *, char *); extern void apic_send_IPI_self(int vector); -- cgit v0.10.2 From 5c520a6724e912a7e6153b7597192edad6752750 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:55 -0700 Subject: x64, x2apic/intr-remap: setup init_apic_ldr for UV Signed-off-by: Suresh Siddha Signed-off-by: Jack Steiner Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 1ef99be..dcd4fb2 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -120,6 +120,10 @@ static int uv_apic_id_registered(void) return 1; } +static inline void uv_init_apic_ldr(void) +{ +} + static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; @@ -164,6 +168,7 @@ struct genapic apic_x2apic_uv_x = { .target_cpus = uv_target_cpus, .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ .apic_id_registered = uv_apic_id_registered, + .init_apic_ldr = uv_init_apic_ldr, .send_IPI_all = uv_send_IPI_all, .send_IPI_allbutself = uv_send_IPI_allbutself, .send_IPI_mask = uv_send_IPI_mask, -- cgit v0.10.2 From 89027d35aa5b8f45ce0f7fa0911db85b46563da0 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:56 -0700 Subject: x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping IO-APIC support in the presence of interrupt-remapping infrastructure. IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, trigger mode etc, which traditionally was present in the IO-APIC RTE. Introduce a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this cleanly). For edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flush the hardware cache. For level triggered, we need to modify the io-apic RTE aswell with the update vector information, along with modifying IRTE with vector and cpu destination. So irq migration for level triggered is little bit more complex compared to edge triggered migration. But the good news is, we use the same algorithm for level triggered migration as we have today, only difference being, we now initiate the irq migration from process context instead of the interrupt context. In future, when we do a directed EOI (combined with cpu EOI broadcast suppression) to the IO-APIC, level triggered irq migration will also be as simple as edge triggered migration and we can do the irq migration with a simple atomic update to IO-APIC RTE. TBD: some tests/changes needed in the presence of fixup_irqs() for level triggered irq migration. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index a969ef7..d5c0691 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -46,6 +46,7 @@ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; +int x2apic; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index b62d42e..9bd02ef 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -37,6 +37,7 @@ #include #endif #include +#include #include #include @@ -48,6 +49,7 @@ #include #include #include +#include #include #include @@ -312,7 +314,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) pin = entry->pin; if (pin == -1) break; - io_apic_write(apic, 0x11 + pin*2, dest); + /* + * With interrupt-remapping, destination information comes + * from interrupt-remapping table entry. + */ + if (!irq_remapped(irq)) + io_apic_write(apic, 0x11 + pin*2, dest); reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; @@ -906,18 +913,98 @@ void setup_vector_irq(int cpu) static struct irq_chip ioapic_chip; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip; +#endif static void ioapic_register_intr(int irq, unsigned long trigger) { - if (trigger) { + if (trigger) irq_desc[irq].status |= IRQ_LEVEL; - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, "fasteoi"); - } else { + else irq_desc[irq].status &= ~IRQ_LEVEL; + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + irq_desc[irq].status |= IRQ_MOVE_PCNTXT; + if (trigger) + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_edge_irq, "edge"); + return; + } +#endif + if (trigger) + set_irq_chip_and_handler_name(irq, &ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_edge_irq, "edge"); +} + +static int setup_ioapic_entry(int apic, int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, int trigger, + int polarity, int vector) +{ + /* + * add it to the IO-APIC irq-routing table: + */ + memset(entry,0,sizeof(*entry)); + +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) { + struct intel_iommu *iommu = map_ioapic_to_ir(apic); + struct irte irte; + struct IR_IO_APIC_route_entry *ir_entry = + (struct IR_IO_APIC_route_entry *) entry; + int index; + + if (!iommu) + panic("No mapping iommu for ioapic %d\n", apic); + + index = alloc_irte(iommu, irq, 1); + if (index < 0) + panic("Failed to allocate IRTE for ioapic %d\n", apic); + + memset(&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = trigger; + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = vector; + irte.dest_id = IRTE_DEST(destination); + + modify_irte(irq, &irte); + + ir_entry->index2 = (index >> 15) & 0x1; + ir_entry->zero = 0; + ir_entry->format = 1; + ir_entry->index = (index & 0x7fff); + } else +#endif + { + entry->delivery_mode = INT_DELIVERY_MODE; + entry->dest_mode = INT_DEST_MODE; + entry->dest = destination; } + + entry->mask = 0; /* enable IRQ */ + entry->trigger = trigger; + entry->polarity = polarity; + entry->vector = vector; + + /* Mask level triggered irqs. + * Use IRQ_DELAYED_DISABLE for edge triggered irqs. + */ + if (trigger) + entry->mask = 1; + return 0; } static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, @@ -942,24 +1029,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, irq, trigger, polarity); - /* - * add it to the IO-APIC irq-routing table: - */ - memset(&entry,0,sizeof(entry)); - - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.dest = cpu_mask_to_apicid(mask); - entry.mask = 0; /* enable IRQ */ - entry.trigger = trigger; - entry.polarity = polarity; - entry.vector = cfg->vector; - /* Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (trigger) - entry.mask = 1; + if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, + cpu_mask_to_apicid(mask), trigger, polarity, + cfg->vector)) { + printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", + mp_ioapics[apic].mp_apicid, pin); + __clear_irq_vector(irq); + return; + } ioapic_register_intr(irq, trigger); if (irq < 16) @@ -1011,6 +1089,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, { struct IO_APIC_route_entry entry; + if (intr_remapping_enabled) + return; + memset(&entry, 0, sizeof(entry)); /* @@ -1466,6 +1547,147 @@ static int ioapic_retrigger_irq(unsigned int irq) */ #ifdef CONFIG_SMP + +#ifdef CONFIG_INTR_REMAP +static void ir_irq_migration(struct work_struct *work); + +static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); + +/* + * Migrate the IO-APIC irq in the presence of intr-remapping. + * + * For edge triggered, irq migration is a simple atomic update(of vector + * and cpu destination) of IRTE and flush the hardware cache. + * + * For level triggered, we need to modify the io-apic RTE aswell with the update + * vector information, along with modifying IRTE with vector and destination. + * So irq migration for level triggered is little bit more complex compared to + * edge triggered migration. But the good news is, we use the same algorithm + * for level triggered migration as we have today, only difference being, + * we now initiate the irq migration from process context instead of the + * interrupt context. + * + * In future, when we do a directed EOI (combined with cpu EOI broadcast + * suppression) to the IO-APIC, level triggered irq migration will also be + * as simple as edge triggered migration and we can do the irq migration + * with a simple atomic update to IO-APIC RTE. + */ +static void migrate_ioapic_irq(int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + struct irq_desc *desc = irq_desc + irq; + cpumask_t tmp, cleanup_mask; + struct irte irte; + int modify_ioapic_rte = desc->status & IRQ_LEVEL; + unsigned int dest; + unsigned long flags; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + if (modify_ioapic_rte) { + spin_lock_irqsave(&ioapic_lock, flags); + __target_IO_APIC_irq(irq, dest, cfg->vector); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * Modified the IRTE and flushes the Interrupt entry cache. + */ + modify_irte(irq, &irte); + + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + irq_desc[irq].affinity = mask; +} + +static int migrate_irq_remapped_level(int irq) +{ + int ret = -1; + + mask_IO_APIC_irq(irq); + + if (io_apic_level_ack_pending(irq)) { + /* + * Interrupt in progress. Migrating irq now will change the + * vector information in the IO-APIC RTE and that will confuse + * the EOI broadcast performed by cpu. + * So, delay the irq migration to the next instance. + */ + schedule_delayed_work(&ir_migration_work, 1); + goto unmask; + } + + /* everthing is clear. we have right of way */ + migrate_ioapic_irq(irq, irq_desc[irq].pending_mask); + + ret = 0; + irq_desc[irq].status &= ~IRQ_MOVE_PENDING; + cpus_clear(irq_desc[irq].pending_mask); + +unmask: + unmask_IO_APIC_irq(irq); + return ret; +} + +static void ir_irq_migration(struct work_struct *work) +{ + int irq; + + for (irq = 0; irq < NR_IRQS; irq++) { + struct irq_desc *desc = irq_desc + irq; + if (desc->status & IRQ_MOVE_PENDING) { + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->chip->set_affinity || + !(desc->status & IRQ_MOVE_PENDING)) { + desc->status &= ~IRQ_MOVE_PENDING; + spin_unlock_irqrestore(&desc->lock, flags); + continue; + } + + desc->chip->set_affinity(irq, + irq_desc[irq].pending_mask); + spin_unlock_irqrestore(&desc->lock, flags); + } + } +} + +/* + * Migrates the IRQ destination in the process context. + */ +static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + if (irq_desc[irq].status & IRQ_LEVEL) { + irq_desc[irq].status |= IRQ_MOVE_PENDING; + irq_desc[irq].pending_mask = mask; + migrate_irq_remapped_level(irq); + return; + } + + migrate_ioapic_irq(irq, mask); +} +#endif + asmlinkage void smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; @@ -1522,6 +1744,17 @@ static void irq_complete_move(unsigned int irq) #else static inline void irq_complete_move(unsigned int irq) {} #endif +#ifdef CONFIG_INTR_REMAP +static void ack_x2apic_level(unsigned int irq) +{ + ack_x2APIC_irq(); +} + +static void ack_x2apic_edge(unsigned int irq) +{ + ack_x2APIC_irq(); +} +#endif static void ack_apic_edge(unsigned int irq) { @@ -1596,6 +1829,21 @@ static struct irq_chip ioapic_chip __read_mostly = { .retrigger = ioapic_retrigger_irq, }; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip __read_mostly = { + .name = "IR-IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_x2apic_edge, + .eoi = ack_x2apic_level, +#ifdef CONFIG_SMP + .set_affinity = set_ir_ioapic_affinity_irq, +#endif + .retrigger = ioapic_retrigger_irq, +}; +#endif + static inline void init_IO_APIC_traps(void) { int irq; @@ -1783,6 +2031,8 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { + if (intr_remapping_enabled) + panic("BIOS bug: timer not connected to IO-APIC"); pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -1809,6 +2059,8 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } + if (intr_remapping_enabled) + panic("timer doesn't work through Interrupt-remapped IO-APIC"); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: " @@ -2401,6 +2653,10 @@ void __init setup_ioapic_dest(void) setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), irq_polarity(irq_entry)); +#ifdef CONFIG_INTR_REMAP + else if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); +#endif else set_ioapic_affinity_irq(irq, TARGET_CPUS); } diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index bddb4b1..32e55c7 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -220,6 +220,16 @@ int flush_irte(int irq) return 0; } +struct intel_iommu *map_ioapic_to_ir(int apic) +{ + int i; + + for (i = 0; i < MAX_IO_APICS; i++) + if (ir_ioapic[i].id == apic) + return ir_ioapic[i].iommu; + return NULL; +} + int free_irte(int irq) { int index, i; diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index bb54928..aa74670 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -134,6 +134,15 @@ extern int get_physical_broadcast(void); # define apic_write_around(x, y) apic_write_atomic((x), (y)) #endif +#ifdef CONFIG_X86_64 +static inline void ack_x2APIC_irq(void) +{ + /* Docs say use 0 for future compatibility */ + native_apic_msr_write(APIC_EOI, 0); +} +#endif + + static inline void ack_APIC_irq(void) { /* diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index 1c4a99d..8dc2622 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -107,6 +107,20 @@ struct IO_APIC_route_entry { } __attribute__ ((packed)); +struct IR_IO_APIC_route_entry { + __u64 vector : 8, + zero : 3, + index2 : 1, + delivery_status : 1, + polarity : 1, + irr : 1, + trigger : 1, + mask : 1, + reserved : 31, + format : 1, + index : 15; +} __attribute__ ((packed)); + #ifdef CONFIG_X86_IO_APIC /* diff --git a/include/asm-x86/irq_remapping.h b/include/asm-x86/irq_remapping.h new file mode 100644 index 0000000..78242c6 --- /dev/null +++ b/include/asm-x86/irq_remapping.h @@ -0,0 +1,8 @@ +#ifndef _ASM_IRQ_REMAPPING_H +#define _ASM_IRQ_REMAPPING_H + +extern int x2apic; + +#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8) + +#endif diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 324bbca..bf41ffa 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -109,6 +109,7 @@ extern int flush_irte(int irq); extern int free_irte(int irq); extern int irq_remapped(int irq); +extern struct intel_iommu *map_ioapic_to_ir(int apic); #else #define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) -- cgit v0.10.2 From 75c46fa61bc5b4ccd20a168ff325c58771248fcd Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:57 -0700 Subject: x64, x2apic/intr-remap: MSI and MSI-X support for interrupt remapping infrastructure MSI and MSI-X support for interrupt remapping infrastructure. MSI address register will be programmed with interrupt-remapping table entry(IRTE) index and the IRTE will contain information about the vector, cpu destination, etc. For MSI-X, all the IRTE's will be consecutively allocated in the table, and the address registers will contain the starting index to the block and the data register will contain the subindex with in that block. This also introduces a new irq_chip for cleaner irq migration (in the process context as opposed to the current irq migration in the context of an interrupt. interrupt-remapping infrastructure will help us achieve this). As MSI is edge triggered, irq migration is a simple atomic update(of vector and cpu destination) of IRTE and flushing the hardware cache. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 9bd02ef..877aa2e 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -2297,6 +2297,9 @@ void destroy_irq(unsigned int irq) dynamic_irq_cleanup(irq); +#ifdef CONFIG_INTR_REMAP + free_irte(irq); +#endif spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); @@ -2315,11 +2318,42 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms tmp = TARGET_CPUS; err = assign_irq_vector(irq, tmp); - if (!err) { - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); + if (err) + return err; + + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irte irte; + int ir_index; + u16 sub_handle; + + ir_index = map_irq_to_irte_handle(irq, &sub_handle); + BUG_ON(ir_index == -1); + + memset (&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = 0; /* edge */ + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + modify_irte(irq, &irte); msg->address_hi = MSI_ADDR_BASE_HI; + msg->data = sub_handle; + msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | + MSI_ADDR_IR_SHV | + MSI_ADDR_IR_INDEX1(ir_index) | + MSI_ADDR_IR_INDEX2(ir_index); + } else +#endif + { + msg->address_hi = MSI_ADDR_BASE_HI; msg->address_lo = MSI_ADDR_BASE_LO | ((INT_DEST_MODE == 0) ? @@ -2369,6 +2403,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) write_msi_msg(irq, &msg); irq_desc[irq].affinity = mask; } + +#ifdef CONFIG_INTR_REMAP +/* + * Migrate the MSI irq to another cpumask. This migration is + * done in the process context using interrupt-remapping hardware. + */ +static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + unsigned int dest; + cpumask_t tmp, cleanup_mask; + struct irte irte; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * atomically update the IRTE with the new destination and vector. + */ + modify_irte(irq, &irte); + + /* + * After this point, all the interrupts will start arriving + * at the new destination. So, time to cleanup the previous + * vector allocation. + */ + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + irq_desc[irq].affinity = mask; +} +#endif #endif /* CONFIG_SMP */ /* @@ -2386,26 +2469,157 @@ static struct irq_chip msi_chip = { .retrigger = ioapic_retrigger_irq, }; -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +#ifdef CONFIG_INTR_REMAP +static struct irq_chip msi_ir_chip = { + .name = "IR-PCI-MSI", + .unmask = unmask_msi_irq, + .mask = mask_msi_irq, + .ack = ack_x2apic_edge, +#ifdef CONFIG_SMP + .set_affinity = ir_set_msi_irq_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +/* + * Map the PCI dev to the corresponding remapping hardware unit + * and allocate 'nvec' consecutive interrupt-remapping table entries + * in it. + */ +static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) +{ + struct intel_iommu *iommu; + int index; + + iommu = map_dev_to_ir(dev); + if (!iommu) { + printk(KERN_ERR + "Unable to map PCI %s to iommu\n", pci_name(dev)); + return -ENOENT; + } + + index = alloc_irte(iommu, irq, nvec); + if (index < 0) { + printk(KERN_ERR + "Unable to allocate %d IRTE for PCI %s\n", nvec, + pci_name(dev)); + return -ENOSPC; + } + return index; +} +#endif + +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) { + int ret; struct msi_msg msg; + + ret = msi_compose_msg(dev, irq, &msg); + if (ret < 0) + return ret; + + set_irq_msi(irq, desc); + write_msi_msg(irq, &msg); + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irq_desc *desc = irq_desc + irq; + /* + * irq migration in process context + */ + desc->status |= IRQ_MOVE_PCNTXT; + set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); + } else +#endif + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); + + return 0; +} + +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +{ int irq, ret; + irq = create_irq(); if (irq < 0) return irq; - ret = msi_compose_msg(dev, irq, &msg); +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + ret = msi_alloc_irte(dev, irq, 1); + if (ret < 0) + goto error; +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); if (ret < 0) { destroy_irq(irq); return ret; } + return 0; - set_irq_msi(irq, desc); - write_msi_msg(irq, &msg); +#ifdef CONFIG_INTR_REMAP +error: + destroy_irq(irq); + return ret; +#endif +} - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int irq, ret, sub_handle; + struct msi_desc *desc; +#ifdef CONFIG_INTR_REMAP + struct intel_iommu *iommu = 0; + int index = 0; +#endif + sub_handle = 0; + list_for_each_entry(desc, &dev->msi_list, list) { + irq = create_irq(); + if (irq < 0) + return irq; +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + if (!sub_handle) { + /* + * allocate the consecutive block of IRTE's + * for 'nvec' + */ + index = msi_alloc_irte(dev, irq, nvec); + if (index < 0) { + ret = index; + goto error; + } + } else { + iommu = map_dev_to_ir(dev); + if (!iommu) { + ret = -ENOENT; + goto error; + } + /* + * setup the mapping between the irq and the IRTE + * base index, the sub_handle pointing to the + * appropriate interrupt remap table entry. + */ + set_irte_irq(irq, iommu, index, sub_handle); + } +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); + if (ret < 0) + goto error; + sub_handle++; + } return 0; + +error: + destroy_irq(irq); + return ret; } void arch_teardown_msi_irq(unsigned int irq) diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 32e55c7..bb642cc 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -230,6 +230,17 @@ struct intel_iommu *map_ioapic_to_ir(int apic) return NULL; } +struct intel_iommu *map_dev_to_ir(struct pci_dev *dev) +{ + struct dmar_drhd_unit *drhd; + + drhd = dmar_find_matched_drhd_unit(dev); + if (!drhd) + return NULL; + + return drhd->iommu; +} + int free_irte(int irq) { int index, i; diff --git a/include/asm-x86/msidef.h b/include/asm-x86/msidef.h index 296f29c..57fd859 100644 --- a/include/asm-x86/msidef.h +++ b/include/asm-x86/msidef.h @@ -48,4 +48,8 @@ #define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ MSI_ADDR_DEST_ID_MASK) +#define MSI_ADDR_IR_EXT_INT (1 << 4) +#define MSI_ADDR_IR_SHV (1 << 3) +#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13) +#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5) #endif /* ASM_MSIDEF_H */ diff --git a/include/linux/dmar.h b/include/linux/dmar.h index bf41ffa..c360c55 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -109,6 +109,7 @@ extern int flush_irte(int irq); extern int free_irte(int irq); extern int irq_remapped(int irq); +extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev); extern struct intel_iommu *map_ioapic_to_ir(int apic); #else #define irq_remapped(irq) (0) -- cgit v0.10.2 From 6e1cb38a2aef7680975e71f23de187859ee8b158 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:58 -0700 Subject: x64, x2apic/intr-remap: add x2apic support, including enabling interrupt-remapping x2apic support. Interrupt-remapping must be enabled before enabling x2apic, this is needed to ensure that IO interrupts continue to work properly after the cpu mode is changed to x2apic(which uses 32bit extended physical/cluster apic id). On systems where apicid's are > 255, BIOS can handover the control to OS in x2apic mode. Or if the OS handover was in legacy xapic mode, check if it is capable of x2apic mode. And if we succeed in enabling Interrupt-remapping, then we can enable x2apic mode in the CPU. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 795c487..56689ef 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1377,6 +1377,8 @@ and is between 256 and 4096 characters. It is defined in the file nolapic_timer [X86-32,APIC] Do not use the local APIC timer. + nox2apic [X86-64,APIC] Do not enable x2APIC mode. + noltlbs [PPC] Do not use large page/tlb entries for kernel lowmem mapping on PPC40x. diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 785700a..8705262 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1337,7 +1337,9 @@ static void __init acpi_process_madt(void) acpi_ioapic = 1; smp_found_config = 1; +#ifdef CONFIG_X86_32 setup_apic_routing(); +#endif } } if (error == -EINVAL) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d5c0691..dd05010 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -39,6 +40,7 @@ #include #include #include +#include #include #include @@ -46,8 +48,12 @@ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; +int disable_x2apic; int x2apic; +/* x2apic enabled before OS handover */ +int x2apic_preenabled; + /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); @@ -896,6 +902,125 @@ void __cpuinit end_local_APIC_setup(void) apic_pm_activate(); } +void check_x2apic(void) +{ + int msr, msr2; + + rdmsr(MSR_IA32_APICBASE, msr, msr2); + + if (msr & X2APIC_ENABLE) { + printk("x2apic enabled by BIOS, switching to x2apic ops\n"); + x2apic_preenabled = x2apic = 1; + apic_ops = &x2apic_ops; + } +} + +void enable_x2apic(void) +{ + int msr, msr2; + + rdmsr(MSR_IA32_APICBASE, msr, msr2); + if (!(msr & X2APIC_ENABLE)) { + printk("Enabling x2apic\n"); + wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); + } +} + +void enable_IR_x2apic(void) +{ +#ifdef CONFIG_INTR_REMAP + int ret; + unsigned long flags; + + if (!cpu_has_x2apic) + return; + + if (!x2apic_preenabled && disable_x2apic) { + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of nox2apic\n"); + return; + } + + if (x2apic_preenabled && disable_x2apic) + panic("Bios already enabled x2apic, can't enforce nox2apic"); + + if (!x2apic_preenabled && skip_ioapic_setup) { + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of skipping io-apic setup\n"); + return; + } + + ret = dmar_table_init(); + if (ret) { + printk(KERN_INFO + "dmar_table_init() failed with %d:\n", ret); + + if (x2apic_preenabled) + panic("x2apic enabled by bios. But IR enabling failed"); + else + printk(KERN_INFO + "Not enabling x2apic,Intr-remapping\n"); + return; + } + + local_irq_save(flags); + mask_8259A(); + save_mask_IO_APIC_setup(); + + ret = enable_intr_remapping(1); + + if (ret && x2apic_preenabled) { + local_irq_restore(flags); + panic("x2apic enabled by bios. But IR enabling failed"); + } + + if (ret) + goto end; + + if (!x2apic) { + x2apic = 1; + apic_ops = &x2apic_ops; + enable_x2apic(); + } +end: + if (ret) + /* + * IR enabling failed + */ + restore_IO_APIC_setup(); + else + reinit_intr_remapped_IO_APIC(x2apic_preenabled); + + unmask_8259A(); + local_irq_restore(flags); + + if (!ret) { + if (!x2apic_preenabled) + printk(KERN_INFO + "Enabled x2apic and interrupt-remapping\n"); + else + printk(KERN_INFO + "Enabled Interrupt-remapping\n"); + } else + printk(KERN_ERR + "Failed to enable Interrupt-remapping and x2apic\n"); +#else + if (!cpu_has_x2apic) + return; + + if (x2apic_preenabled) + panic("x2apic enabled prior OS handover," + " enable CONFIG_INTR_REMAP"); + + printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping " + " and x2apic\n"); +#endif + + return; +} + /* * Detect and enable local APICs on non-SMP boards. * Original code written by Keir Fraser. @@ -943,6 +1068,11 @@ void __init early_init_lapic_mapping(void) */ void __init init_apic_mappings(void) { + if (x2apic) { + boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + return; + } + /* * If no local APIC can be found then set up a fake all * zeroes page to simulate the local APIC and another @@ -981,6 +1111,9 @@ int __init APIC_init_uniprocessor(void) return -1; } + enable_IR_x2apic(); + setup_apic_routing(); + verify_local_APIC(); connect_bsp_APIC(); @@ -1238,10 +1371,14 @@ static int lapic_resume(struct sys_device *dev) maxlvt = lapic_get_maxlvt(); local_irq_save(flags); - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); + if (!x2apic) { + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); + } else + enable_x2apic(); + apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); apic_write(APIC_DFR, apic_pm_state.apic_dfr); @@ -1381,6 +1518,15 @@ __cpuinit int apic_is_clustered_box(void) return (clusters > 2); } +static __init int setup_nox2apic(char *str) +{ + disable_x2apic = 1; + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC); + return 0; +} +early_param("nox2apic", setup_nox2apic); + + /* * APIC command line parameters */ diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 36537ab..e7bf3c2 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -606,6 +606,8 @@ void __cpuinit cpu_init(void) barrier(); check_efer(); + if (cpu != 0 && x2apic) + enable_x2apic(); /* * set up and load the per-CPU TSS diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 1029e17..792e21b 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3b25e49..70e1f3e 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -545,7 +545,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) generic_bigsmp_probe(); #endif +#ifdef CONFIG_X86_32 setup_apic_routing(); +#endif if (!num_processors) printk(KERN_ERR "MPTABLE: no processors registered!\n"); return num_processors; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 987b6fd..2e78a14 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -730,6 +730,8 @@ void __init setup_arch(char **cmdline_p) num_physpages = max_pfn; check_efer(); + if (cpu_has_x2apic) + check_x2apic(); /* How many end-of-memory variables you have, grandma! */ /* need this before calling reserve_initrd */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c55263b..0c43e1f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1145,6 +1145,11 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) current_thread_info()->cpu = 0; /* needed? */ set_cpu_sibling_map(0); +#ifdef CONFIG_X86_64 + enable_IR_x2apic(); + setup_apic_routing(); +#endif + if (smp_sanity_check(max_cpus) < 0) { printk(KERN_INFO "SMP disabled\n"); disable_smp(); diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index aa74670..129752d 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -100,6 +100,11 @@ extern void apic_wait_icr_idle(void); extern u32 safe_apic_wait_icr_idle(void); extern void apic_icr_write(u32 low, u32 id); #else +extern int x2apic, x2apic_preenabled; +extern void check_x2apic(void); +extern void enable_x2apic(void); +extern void enable_IR_x2apic(void); +extern void x2apic_icr_write(u32 low, u32 id); struct apic_ops { u32 (*read)(u32 reg); -- cgit v0.10.2 From 2d9579a124d746a3e0e0ba45e57d80800ee80807 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:16:59 -0700 Subject: x64, x2apic/intr-remap: support for x2apic physical mode support x2apic Physical mode support. By default we will use x2apic cluster mode. x2apic physical mode can be selected using "x2apic_phys" boot parameter. Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index bde3e9b..81280e9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -96,6 +96,7 @@ obj-$(CONFIG_OLPC) += olpc.o ifeq ($(CONFIG_X86_64),y) obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o obj-y += genx2apic_cluster.o + obj-y += genx2apic_phys.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 792e21b..3940d81 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -30,6 +30,15 @@ DEFINE_PER_CPU(int, x2apic_extra_bits); struct genapic __read_mostly *genapic = &apic_flat; +static int x2apic_phys = 0; + +static int set_x2apic_phys_mode(char *arg) +{ + x2apic_phys = 1; + return 0; +} +early_param("x2apic_phys", set_x2apic_phys_mode); + static enum uv_system_type uv_system_type; /* @@ -39,9 +48,12 @@ void __init setup_apic_routing(void) { if (uv_system_type == UV_NON_UNIQUE_APIC) genapic = &apic_x2apic_uv_x; - else if (cpu_has_x2apic && intr_remapping_enabled) - genapic = &apic_x2apic_cluster; - else + else if (cpu_has_x2apic && intr_remapping_enabled) { + if (x2apic_phys) + genapic = &apic_x2apic_phys; + else + genapic = &apic_x2apic_cluster; + } else #ifdef CONFIG_ACPI /* * Quirk: some x86_64 machines can only use physical APIC mode diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c new file mode 100644 index 0000000..3c70b9d --- /dev/null +++ b/arch/x86/kernel/genx2apic_phys.c @@ -0,0 +1,122 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ + +static cpumask_t x2apic_target_cpus(void) +{ + return cpumask_of_cpu(0); +} + +static cpumask_t x2apic_vector_allocation_domain(int cpu) +{ + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; +} + +static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, + unsigned int dest) +{ + unsigned long cfg; + + cfg = __prepare_ICR(0, vector, dest); + + /* + * send the IPI. + */ + x2apic_icr_write(cfg, apicid); +} + +static void x2apic_send_IPI_mask(cpumask_t mask, int vector) +{ + unsigned long flags; + unsigned long query_cpu; + + local_irq_save(flags); + for_each_cpu_mask(query_cpu, mask) { + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + } + local_irq_restore(flags); +} + +static void x2apic_send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + x2apic_send_IPI_mask(mask, vector); +} + +static void x2apic_send_IPI_all(int vector) +{ + x2apic_send_IPI_mask(cpu_online_map, vector); +} + +static int x2apic_apic_id_registered(void) +{ + return 1; +} + +static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) + return per_cpu(x86_cpu_to_apicid, cpu); + else + return BAD_APICID; +} + +static unsigned int x2apic_read_id(void) +{ + return apic_read(APIC_ID); +} + +static unsigned int phys_pkg_id(int index_msb) +{ + return x2apic_read_id() >> index_msb; +} + +void x2apic_send_IPI_self(int vector) +{ + apic_write(APIC_SELF_IPI, vector); +} + +void init_x2apic_ldr(void) +{ + return; +} + +struct genapic apic_x2apic_phys = { + .name = "physical x2apic", + .int_delivery_mode = dest_Fixed, + .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + .target_cpus = x2apic_target_cpus, + .vector_allocation_domain = x2apic_vector_allocation_domain, + .apic_id_registered = x2apic_apic_id_registered, + .init_apic_ldr = init_x2apic_ldr, + .send_IPI_all = x2apic_send_IPI_all, + .send_IPI_allbutself = x2apic_send_IPI_allbutself, + .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_self = x2apic_send_IPI_self, + .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .phys_pkg_id = phys_pkg_id, + .read_apic_id = x2apic_read_id, +}; diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index 2324603..122b924 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -36,6 +36,7 @@ extern struct genapic *genapic; extern struct genapic apic_flat; extern struct genapic apic_physflat; extern struct genapic apic_x2apic_cluster; +extern struct genapic apic_x2apic_phys; extern int acpi_madt_oem_check(char *, char *); extern void apic_send_IPI_self(int vector); -- cgit v0.10.2 From 9fa8c481b55e80edd8c637573f87853bb6b600f5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 11:17:00 -0700 Subject: x64, x2apic/intr-remap: introduce CONFIG_INTR_REMAP Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Cc: arjan@linux.intel.com Cc: andi@firstfloor.org Cc: ebiederm@xmission.com Cc: jbarnes@virtuousgeek.org Cc: steiner@sgi.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2cfccc9..0bf8391 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1651,6 +1651,14 @@ config DMAR_FLOPPY_WA workaround will setup a 1:1 mapping for the first 16M to make floppy (an ISA device) work. +config INTR_REMAP + bool "Support for Interrupt Remapping (EXPERIMENTAL)" + depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL + help + Supports Interrupt remapping for IO-APIC and MSI devices. + To use x2apic mode in the CPU's which support x2APIC enhancements or + to support platforms with CPU's having > 8 bit APIC ID, say Y. + source "drivers/pci/pcie/Kconfig" source "drivers/pci/Kconfig" -- cgit v0.10.2 From 372e92d8b3e433888bf76c36f1c7e1405eae1584 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 10 Jul 2008 14:56:18 -0700 Subject: x64, x2apic/intr-remap: Interrupt-remapping and x2apic support On Thu, Jul 10, 2008 at 12:53:20PM -0700, Ingo Molnar wrote: > > Btw., i threw it at the -tip test-cluster and got back a quick build > bugreport: > > arch/x86/xen/enlighten.c: In function 'xen_patch': > arch/x86/xen/enlighten.c:1084: warning: label 'patch_site' defined but not used > arch/x86/xen/enlighten.c: At top level: > arch/x86/xen/enlighten.c:1272: error: expected identifier before '(' token > arch/x86/xen/enlighten.c:1273: error: expected '}' before '.' token > arch/x86/kernel/paravirt.c:376:2: error: invalid preprocessing directive > #ifndedarch/x86/kernel/paravirt.c:384:2: error: #endif without #if > > with this config: > > http://redhat.com/~mingo/misc/config-Thu_Jul_10_21_43_28_CEST_2008.bad fix the typo. Signed-off-by: Suresh Siddha Cc: "Siddha Cc: Suresh B" Cc: "akpm@linux-foundation.org" Cc: "arjan@linux.intel.com" Cc: "andi@firstfloor.org" Cc: "ebiederm@xmission.com" Cc: "jbarnes@virtuousgeek.org" Cc: "steiner@sgi.com" Cc: jeremy@goop.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index b80105a..4f29ff8 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -360,7 +360,7 @@ struct pv_cpu_ops pv_cpu_ops = { struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC -#ifnded CONFIG_X86_64 +#ifndef CONFIG_X86_64 .apic_write = native_apic_mem_write, .apic_write_atomic = native_apic_mem_write_atomic, .apic_read = native_apic_mem_read, -- cgit v0.10.2 From 277d1f5846d84e16760131a93b7a67ebfa8eded4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 11 Jul 2008 13:11:55 -0700 Subject: x2apic: uninline uv_init_apic_ldr() Andrew says: > There's no point in declaring it inline if it's always called indirectly. And point taken! Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index dcd4fb2..c915f75 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -120,7 +120,7 @@ static int uv_apic_id_registered(void) return 1; } -static inline void uv_init_apic_ldr(void) +static void uv_init_apic_ldr(void) { } -- cgit v0.10.2 From ad66dd340f561bdde2285992314d9e4fd9b6191e Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 11 Jul 2008 13:11:56 -0700 Subject: x2apic: xen64 paravirt basic apic ops Define the Xen specific basic apic ops, in additon to paravirt apic ops, with some misc warning fixes. Signed-off-by: Suresh Siddha Cc: Jeremy Fitzhardinge Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 50dad44..0c45df2 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -783,11 +783,11 @@ static void lguest_wbinvd(void) * code qualifies for Advanced. It will also never interrupt anything. It * does, however, allow us to get through the Linux boot code. */ #ifdef CONFIG_X86_LOCAL_APIC -static void lguest_apic_write(unsigned long reg, u32 v) +static void lguest_apic_write(u32 reg, u32 v) { } -static u32 lguest_apic_read(unsigned long reg) +static u32 lguest_apic_read(u32 reg) { return 0; } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index dcd4e51..54e2556 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -548,16 +548,45 @@ static void xen_io_delay(void) } #ifdef CONFIG_X86_LOCAL_APIC -static u32 xen_apic_read(unsigned long reg) +static u32 xen_apic_read(u32 reg) { return 0; } -static void xen_apic_write(unsigned long reg, u32 val) +static void xen_apic_write(u32 reg, u32 val) { /* Warn to see if there's any stray references */ WARN_ON(1); } + +#ifdef CONFIG_X86_64 +static u64 xen_apic_icr_read(void) +{ + return 0; +} + +static void xen_apic_icr_write(u32 low, u32 id) +{ + /* Warn to see if there's any stray references */ + WARN_ON(1); +} + +static void xen_apic_wait_icr_idle(void) +{ + return; +} + +static struct apic_ops xen_basic_apic_ops = { + .read = xen_apic_read, + .write = xen_apic_write, + .write_atomic = xen_apic_write, + .icr_read = xen_apic_icr_read, + .icr_write = xen_apic_icr_write, + .wait_icr_idle = xen_apic_wait_icr_idle, + .safe_wait_icr_idle = xen_apic_wait_icr_idle, +}; +#endif + #endif static void xen_flush_tlb(void) @@ -1130,9 +1159,11 @@ static const struct pv_irq_ops xen_irq_ops __initdata = { static const struct pv_apic_ops xen_apic_ops __initdata = { #ifdef CONFIG_X86_LOCAL_APIC +#ifndef CONFIG_X86_64 .apic_write = xen_apic_write, .apic_write_atomic = xen_apic_write, .apic_read = xen_apic_read, +#endif .setup_boot_clock = paravirt_nop, .setup_secondary_clock = paravirt_nop, .startup_ipi_hook = paravirt_nop, @@ -1291,6 +1322,12 @@ asmlinkage void __init xen_start_kernel(void) pv_irq_ops = xen_irq_ops; pv_apic_ops = xen_apic_ops; pv_mmu_ops = xen_mmu_ops; +#ifdef CONFIG_X86_64 + /* + * for 64bit, set up the basic apic ops aswell. + */ + apic_ops = &xen_basic_apic_ops; +#endif if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index 10adac0..5e34d26 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -200,13 +200,15 @@ struct pv_irq_ops { struct pv_apic_ops { #ifdef CONFIG_X86_LOCAL_APIC +#ifndef CONFIG_X86_64 /* * Direct APIC operations, principally for VMI. Ideally * these shouldn't be in this interface. */ - void (*apic_write)(unsigned long reg, u32 v); - void (*apic_write_atomic)(unsigned long reg, u32 v); - u32 (*apic_read)(unsigned long reg); + void (*apic_write)(u32 reg, u32 v); + void (*apic_write_atomic)(u32 reg, u32 v); + u32 (*apic_read)(u32 reg); +#endif void (*setup_boot_clock)(void); void (*setup_secondary_clock)(void); @@ -892,17 +894,17 @@ static inline void slow_down_io(void) * Basic functions accessing APICs. */ #ifndef CONFIG_X86_64 -static inline void apic_write(unsigned long reg, u32 v) +static inline void apic_write(u32 reg, u32 v) { PVOP_VCALL2(pv_apic_ops.apic_write, reg, v); } -static inline void apic_write_atomic(unsigned long reg, u32 v) +static inline void apic_write_atomic(u32 reg, u32 v) { PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v); } -static inline u32 apic_read(unsigned long reg) +static inline u32 apic_read(u32 reg) { return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg); } -- cgit v0.10.2 From af9d13887f9e3699bbc852c39b076d30bb9dff2f Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 11 Jul 2008 13:11:57 -0700 Subject: x2apic: kernel-parameter documentation for "x2apic_phys" Signed-off-by: Suresh Siddha Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 56689ef..88b08ac 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1379,6 +1379,10 @@ and is between 256 and 4096 characters. It is defined in the file nox2apic [X86-64,APIC] Do not enable x2APIC mode. + x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of + default x2apic cluster mode on platforms + supporting x2apic. + noltlbs [PPC] Do not use large page/tlb entries for kernel lowmem mapping on PPC40x. -- cgit v0.10.2 From c535b6a1a685eb23f96e2c221777d6c1e05080d5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 11 Jul 2008 18:41:54 -0700 Subject: x86: let 32bit use apic_ops too Signed-off-by: Yinghai Lu Cc: Suresh Siddha Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 2a83c07..7413354 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -145,19 +145,13 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } -void apic_icr_write(u32 low, u32 id) -{ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id)); - apic_write_around(APIC_ICR, low); -} - -void apic_wait_icr_idle(void) +void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } -u32 safe_apic_wait_icr_idle(void) +u32 safe_xapic_wait_icr_idle(void) { u32 send_status; int timeout; @@ -173,6 +167,35 @@ u32 safe_apic_wait_icr_idle(void) return send_status; } +void xapic_icr_write(u32 low, u32 id) +{ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id)); + apic_write_around(APIC_ICR, low); +} + +u64 xapic_icr_read(void) +{ + u32 icr1, icr2; + + icr2 = apic_read(APIC_ICR2); + icr1 = apic_read(APIC_ICR); + + return icr1 | ((u64)icr2 << 32); +} + +static struct apic_ops xapic_ops = { + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .write_atomic = native_apic_mem_write_atomic, + .icr_read = xapic_icr_read, + .icr_write = xapic_icr_write, + .wait_icr_idle = xapic_wait_icr_idle, + .safe_wait_icr_idle = safe_xapic_wait_icr_idle, +}; + +struct apic_ops __read_mostly *apic_ops = &xapic_ops; +EXPORT_SYMBOL_GPL(apic_ops); + /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index 129752d..fcd2f01 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -49,11 +49,6 @@ extern int disable_apic; #ifdef CONFIG_PARAVIRT #include #else -#ifndef CONFIG_X86_64 -#define apic_write native_apic_mem_write -#define apic_write_atomic native_apic_mem_write_atomic -#define apic_read native_apic_mem_read -#endif #define setup_boot_clock setup_boot_APIC_clock #define setup_secondary_clock setup_secondary_APIC_clock #endif @@ -95,16 +90,13 @@ static inline u32 native_apic_msr_read(u32 reg) return low; } -#ifdef CONFIG_X86_32 -extern void apic_wait_icr_idle(void); -extern u32 safe_apic_wait_icr_idle(void); -extern void apic_icr_write(u32 low, u32 id); -#else +#ifndef CONFIG_X86_32 extern int x2apic, x2apic_preenabled; extern void check_x2apic(void); extern void enable_x2apic(void); extern void enable_IR_x2apic(void); extern void x2apic_icr_write(u32 low, u32 id); +#endif struct apic_ops { u32 (*read)(u32 reg); @@ -125,7 +117,6 @@ extern struct apic_ops *apic_ops; #define apic_icr_write (apic_ops->icr_write) #define apic_wait_icr_idle (apic_ops->wait_icr_idle) #define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle) -#endif extern int get_physical_broadcast(void); -- cgit v0.10.2 From 4696ca5bfd2697f5686f96d59cf0b6de14869b4e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 11 Jul 2008 18:43:10 -0700 Subject: x86: mach_apicdef.h need to include before smp.h smp.h internal has include, so need to include that at first when genericarch use them need to have different apicdef.h Signed-off-by: Yinghai Lu Cc: Suresh Siddha Signed-off-by: Ingo Molnar diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 59d7717..b31f2800 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -5,17 +5,16 @@ #define APIC_DEFINITION 1 #include #include -#include #include #include #include #include #include -#include #include #include -#include #include +#include +#include #include #include diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 4742626..9b30547 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -4,16 +4,15 @@ #define APIC_DEFINITION 1 #include #include -#include #include #include #include #include #include #include -#include #include #include +#include #include #include #include diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 8091e68..95c07ef 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -4,7 +4,6 @@ #define APIC_DEFINITION 1 #include #include -#include #include #include #include @@ -12,8 +11,9 @@ #include #include #include -#include #include +#include +#include #include #include #include diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index a97ea0f..752edd9 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -4,17 +4,16 @@ #define APIC_DEFINITION 1 #include #include -#include #include #include #include #include #include #include -#include #include -#include #include +#include +#include #include #include -- cgit v0.10.2 From 4c9961d56ec20c27ec5d02e49fd7427748312741 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 11 Jul 2008 18:44:16 -0700 Subject: x86: make read_apic_id return final apicid also remove GET_APIC_ID when read_apic_id is used. need to apply after [PATCH] x86: mach_apicdef.h need to include before smp.h Signed-off-by: Yinghai Lu Cc: Suresh Siddha Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8705262..b314bcd 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -761,7 +761,7 @@ static void __init acpi_register_lapic_address(unsigned long address) set_fixmap_nocache(FIX_APIC_BASE, address); if (boot_cpu_physical_apicid == -1U) { - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); #ifdef CONFIG_X86_32 apic_version[boot_cpu_physical_apicid] = GET_APIC_VERSION(apic_read(APIC_LVR)); diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 7413354..47ff978 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1230,7 +1230,7 @@ void __init init_apic_mappings(void) * default configuration (or the MP table is broken). */ if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); } @@ -1270,7 +1270,7 @@ int __init APIC_init_uniprocessor(void) * might be zero if read from MP tables. Get it from LAPIC. */ #ifdef CONFIG_CRASH_DUMP - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); #endif physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index dd05010..c75f58a 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1060,7 +1060,7 @@ void __init early_init_lapic_mapping(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); } /** @@ -1069,7 +1069,7 @@ void __init early_init_lapic_mapping(void) void __init init_apic_mappings(void) { if (x2apic) { - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); return; } @@ -1092,7 +1092,7 @@ void __init init_apic_mappings(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); } /* diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 7355868..7dac2f2 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -101,7 +101,7 @@ static unsigned int read_xapic_id(void) { unsigned int id; - id = GET_XAPIC_ID(apic_read(APIC_ID)); + id = GET_APIC_ID(apic_read(APIC_ID)); return id; } diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index c50adb8..382208d1 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1501,7 +1501,7 @@ void /*__init*/ print_local_APIC(void *dummy) smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, - GET_APIC_ID(read_apic_id())); + GET_APIC_ID(v)); v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); @@ -1709,8 +1709,7 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest.physical.physical_dest = - GET_APIC_ID(read_apic_id()); + entry.dest.physical.physical_dest = read_apic_id(); /* * Add it to the IO-APIC irq-routing table: diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 877aa2e..2db0f98 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1246,7 +1246,7 @@ void __apicdebuginit print_local_APIC(void * dummy) printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); + printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); @@ -1439,7 +1439,7 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest = GET_APIC_ID(read_apic_id()); + entry.dest = read_apic_id(); /* * Add it to the IO-APIC irq-routing table: diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0c43e1f..6cd002f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -211,7 +211,7 @@ static void __cpuinit smp_callin(void) /* * (This works even if the APIC is not enabled.) */ - phys_id = GET_APIC_ID(read_apic_id()); + phys_id = read_apic_id(); cpuid = smp_processor_id(); if (cpu_isset(cpuid, cpu_callin_map)) { panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, @@ -1157,9 +1157,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) } preempt_disable(); - if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) { + if (read_apic_id() != boot_cpu_physical_apicid) { panic("Boot APIC ID in local APIC unexpected (%d vs %d)", - GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid); + read_apic_id(), boot_cpu_physical_apicid); /* Or can we switch back to PIC here? */ } preempt_enable(); diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h index e06d239..925b797 100644 --- a/include/asm-x86/mach-default/mach_apic.h +++ b/include/asm-x86/mach-default/mach_apic.h @@ -56,7 +56,7 @@ static inline void init_apic_ldr(void) static inline int apic_id_registered(void) { - return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); + return physid_isset(read_apic_id(), phys_cpu_present_map); } static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) diff --git a/include/asm-x86/mach-default/mach_apicdef.h b/include/asm-x86/mach-default/mach_apicdef.h index 453b58a..3e1be6c 100644 --- a/include/asm-x86/mach-default/mach_apicdef.h +++ b/include/asm-x86/mach-default/mach_apicdef.h @@ -5,9 +5,8 @@ #ifdef CONFIG_X86_64 #define APIC_ID_MASK (0xFFu<<24) -#define GET_APIC_ID(x) (x) +#define GET_APIC_ID(x) (((x)>>24) & 0xFFu) #define SET_APIC_ID(x) (((x)<<24)) -#define GET_XAPIC_ID(x) (((x) >> 24) & 0xFFu) #else #define APIC_ID_MASK (0xF<<24) static inline unsigned get_apic_id(unsigned long x) diff --git a/include/asm-x86/mach-es7000/mach_apic.h b/include/asm-x86/mach-es7000/mach_apic.h index fbc8ad2..b3556ec 100644 --- a/include/asm-x86/mach-es7000/mach_apic.h +++ b/include/asm-x86/mach-es7000/mach_apic.h @@ -141,7 +141,7 @@ static inline void setup_portio_remap(void) extern unsigned int boot_cpu_physical_apicid; static inline int check_phys_apicid_present(int cpu_physical_apicid) { - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); return (1); } diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index d9d007d..3b43ca2 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -165,9 +165,14 @@ static inline int logical_smp_processor_id(void) return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); } +#include static inline unsigned int read_apic_id(void) { - return *(u32 *)(APIC_BASE + APIC_ID); + unsigned int reg; + + reg = *(u32 *)(APIC_BASE + APIC_ID); + + return GET_APIC_ID(reg); } #endif @@ -175,11 +180,11 @@ static inline unsigned int read_apic_id(void) # if defined(APIC_DEFINITION) || defined(CONFIG_X86_64) extern int hard_smp_processor_id(void); # else -# include +#include static inline int hard_smp_processor_id(void) { /* we don't want to mark this access volatile - bad code generation */ - return GET_APIC_ID(read_apic_id()); + return read_apic_id(); } # endif /* APIC_DEFINITION */ -- cgit v0.10.2 From f910a9dc7c865896815e2a95fe33363e9522f277 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 12 Jul 2008 01:01:20 -0700 Subject: x86: make 64bit have get_apic_id generalize the x2apic code some more. let read_apic_id become a macro (later on a function/inline) GET_APIC_ID(apic_read(APIC_ID)) +#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) instead of this weird construct: -#define read_apic_id (genapic->read_apic_id) Signed-off-by: Yinghai Lu Cc: Suresh Siddha Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 7dac2f2..2c973cb 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -97,11 +97,27 @@ static void flat_send_IPI_all(int vector) __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } +static unsigned int get_apic_id(unsigned long x) +{ + unsigned int id; + + id = (((x)>>24) & 0xFFu); + return id; +} + +static unsigned long set_apic_id(unsigned int id) +{ + unsigned long x; + + x = ((id & 0xFFu)<<24); + return x; +} + static unsigned int read_xapic_id(void) { unsigned int id; - id = GET_APIC_ID(apic_read(APIC_ID)); + id = get_apic_id(apic_read(APIC_ID)); return id; } @@ -134,7 +150,9 @@ struct genapic apic_flat = { .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, - .read_apic_id = read_xapic_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFu<<24), }; /* @@ -200,5 +218,7 @@ struct genapic apic_physflat = { .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, - .read_apic_id = read_xapic_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFu<<24), }; diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index ed0fded..40bc014 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -94,6 +94,22 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) return BAD_APICID; } +static unsigned int get_apic_id(unsigned long x) +{ + unsigned int id; + + id = x; + return id; +} + +static unsigned long set_apic_id(unsigned int id) +{ + unsigned long x; + + x = id; + return x; +} + static unsigned int x2apic_read_id(void) { return apic_read(APIC_ID); @@ -131,5 +147,7 @@ struct genapic apic_x2apic_cluster = { .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, - .read_apic_id = x2apic_read_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFFFFFFFu), }; diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 3c70b9d..2f3c6ca 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -84,6 +84,22 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) return BAD_APICID; } +static unsigned int get_apic_id(unsigned long x) +{ + unsigned int id; + + id = x; + return id; +} + +static unsigned long set_apic_id(unsigned int id) +{ + unsigned long x; + + x = id; + return x; +} + static unsigned int x2apic_read_id(void) { return apic_read(APIC_ID); @@ -118,5 +134,7 @@ struct genapic apic_x2apic_phys = { .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, - .read_apic_id = x2apic_read_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFFFFFFFu), }; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index c915f75..3ca29cd 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -139,16 +139,31 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) return BAD_APICID; } -static unsigned int uv_read_apic_id(void) +static unsigned int get_apic_id(unsigned long x) { unsigned int id; WARN_ON(preemptible() && num_online_cpus() > 1); - id = apic_read(APIC_ID) | __get_cpu_var(x2apic_extra_bits); + id = x | __get_cpu_var(x2apic_extra_bits); return id; } +static long set_apic_id(unsigned int id) +{ + unsigned long x; + + /* maskout x2apic_extra_bits ? */ + x = id; + return x; +} + +static unsigned int uv_read_apic_id(void) +{ + + return get_apic_id(apic_read(APIC_ID)); +} + static unsigned int phys_pkg_id(int index_msb) { return uv_read_apic_id() >> index_msb; @@ -175,7 +190,9 @@ struct genapic apic_x2apic_uv_x = { /* ZZZ.send_IPI_self = uv_send_IPI_self, */ .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ - .read_apic_id = uv_read_apic_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFFFFFFFu), }; static __cpuinit void set_x2apic_extra_bits(int pnode) diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index 122b924..8ff2589 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -28,7 +28,9 @@ struct genapic { /* */ unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); unsigned int (*phys_pkg_id)(int index_msb); - unsigned int (*read_apic_id)(void); + unsigned int (*get_apic_id)(unsigned long x); + unsigned long (*set_apic_id)(unsigned int id); + unsigned long apic_id_mask; }; extern struct genapic *genapic; diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h index 925b797..3d2b455 100644 --- a/include/asm-x86/mach-default/mach_apic.h +++ b/include/asm-x86/mach-default/mach_apic.h @@ -30,7 +30,7 @@ static inline cpumask_t target_cpus(void) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) #define phys_pkg_id (genapic->phys_pkg_id) #define vector_allocation_domain (genapic->vector_allocation_domain) -#define read_apic_id (genapic->read_apic_id) +#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) #define send_IPI_self (genapic->send_IPI_self) extern void setup_apic_routing(void); #else diff --git a/include/asm-x86/mach-default/mach_apicdef.h b/include/asm-x86/mach-default/mach_apicdef.h index 3e1be6c..a55518a 100644 --- a/include/asm-x86/mach-default/mach_apicdef.h +++ b/include/asm-x86/mach-default/mach_apicdef.h @@ -4,9 +4,9 @@ #include #ifdef CONFIG_X86_64 -#define APIC_ID_MASK (0xFFu<<24) -#define GET_APIC_ID(x) (((x)>>24) & 0xFFu) -#define SET_APIC_ID(x) (((x)<<24)) +#define APIC_ID_MASK (genapic->apic_id_mask) +#define GET_APIC_ID(x) (genapic->get_apic_id(x)) +#define SET_APIC_ID(x) (genapic->set_apic_id(x)) #else #define APIC_ID_MASK (0xF<<24) static inline unsigned get_apic_id(unsigned long x) -- cgit v0.10.2 From 404571525db92bafeddb0cf9febb21aac6613dca Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Mon, 14 Jul 2008 08:59:48 +0200 Subject: [CRIS] Rename boot-linkscripts and fix the path to them. This makes the CRIS-port directories follow the same naming convention as the rest of the kernel. Signed-off-by: Jesper Nilsson diff --git a/arch/cris/arch-v10/boot/compressed/Makefile b/arch/cris/arch-v10/boot/compressed/Makefile index 08d943c..6fe0ffa 100644 --- a/arch/cris/arch-v10/boot/compressed/Makefile +++ b/arch/cris/arch-v10/boot/compressed/Makefile @@ -4,7 +4,7 @@ asflags-y += $(LINUXINCLUDE) ccflags-y += -O2 $(LINUXINCLUDE) -ldflags-y += -T $(srctree)/$(obj)/decompress.ld +ldflags-y += -T $(srctree)/$(src)/decompress.lds OBJECTS = $(obj)/head.o $(obj)/misc.o OBJCOPYFLAGS = -O binary --remove-section=.bss diff --git a/arch/cris/arch-v10/boot/compressed/decompress.ld b/arch/cris/arch-v10/boot/compressed/decompress.ld deleted file mode 100644 index e80f459..0000000 --- a/arch/cris/arch-v10/boot/compressed/decompress.ld +++ /dev/null @@ -1,30 +0,0 @@ -/* OUTPUT_FORMAT(elf32-us-cris) */ -OUTPUT_FORMAT(elf32-cris) - -MEMORY - { - dram : ORIGIN = 0x40700000, - LENGTH = 0x00100000 - } - -SECTIONS -{ - .text : - { - _stext = . ; - *(.text) - *(.rodata) - *(.rodata.*) - _etext = . ; - } > dram - .data : - { - *(.data) - _edata = . ; - } > dram - .bss : - { - *(.bss) - _end = ALIGN( 0x10 ) ; - } > dram -} diff --git a/arch/cris/arch-v10/boot/compressed/decompress.lds b/arch/cris/arch-v10/boot/compressed/decompress.lds new file mode 100644 index 0000000..e80f459 --- /dev/null +++ b/arch/cris/arch-v10/boot/compressed/decompress.lds @@ -0,0 +1,30 @@ +/* OUTPUT_FORMAT(elf32-us-cris) */ +OUTPUT_FORMAT(elf32-cris) + +MEMORY + { + dram : ORIGIN = 0x40700000, + LENGTH = 0x00100000 + } + +SECTIONS +{ + .text : + { + _stext = . ; + *(.text) + *(.rodata) + *(.rodata.*) + _etext = . ; + } > dram + .data : + { + *(.data) + _edata = . ; + } > dram + .bss : + { + *(.bss) + _end = ALIGN( 0x10 ) ; + } > dram +} diff --git a/arch/cris/arch-v10/boot/rescue/Makefile b/arch/cris/arch-v10/boot/rescue/Makefile index 07688da..82ab59b 100644 --- a/arch/cris/arch-v10/boot/rescue/Makefile +++ b/arch/cris/arch-v10/boot/rescue/Makefile @@ -4,7 +4,7 @@ ccflags-y += -O2 $(LINUXINCLUDE) asflags-y += $(LINUXINCLUDE) -ldflags-y += -T $(srctree)/$(obj)/rescue.ld +ldflags-y += -T $(srctree)/$(src)/rescue.lds OBJCOPYFLAGS = -O binary --remove-section=.bss obj-$(CONFIG_ETRAX_AXISFLASHMAP) = head.o OBJECT := $(obj)/head.o diff --git a/arch/cris/arch-v10/boot/rescue/rescue.ld b/arch/cris/arch-v10/boot/rescue/rescue.ld deleted file mode 100644 index 0b52a94..0000000 --- a/arch/cris/arch-v10/boot/rescue/rescue.ld +++ /dev/null @@ -1,20 +0,0 @@ -MEMORY - { - flash : ORIGIN = 0x00000000, - LENGTH = 0x00100000 - } - -SECTIONS -{ - .text : - { - stext = . ; - *(.text) - etext = . ; - } > flash - .data : - { - *(.data) - edata = . ; - } > flash -} diff --git a/arch/cris/arch-v10/boot/rescue/rescue.lds b/arch/cris/arch-v10/boot/rescue/rescue.lds new file mode 100644 index 0000000..0b52a94 --- /dev/null +++ b/arch/cris/arch-v10/boot/rescue/rescue.lds @@ -0,0 +1,20 @@ +MEMORY + { + flash : ORIGIN = 0x00000000, + LENGTH = 0x00100000 + } + +SECTIONS +{ + .text : + { + stext = . ; + *(.text) + etext = . ; + } > flash + .data : + { + *(.data) + edata = . ; + } > flash +} diff --git a/arch/cris/arch-v32/boot/compressed/Makefile b/arch/cris/arch-v32/boot/compressed/Makefile index d6335f2..5a1b31c 100644 --- a/arch/cris/arch-v32/boot/compressed/Makefile +++ b/arch/cris/arch-v32/boot/compressed/Makefile @@ -4,7 +4,7 @@ asflags-y += -I $(srctree)/include/asm/mach/ -I $(srctree)/include/asm/arch ccflags-y += -O2 -I $(srctree)/include/asm/mach/ -I $(srctree)/include/asm/arch -ldflags-y += -T $(srctree)/$(obj)/decompress.ld +ldflags-y += -T $(srctree)/$(src)/decompress.lds OBJECTS = $(obj)/head.o $(obj)/misc.o OBJCOPYFLAGS = -O binary --remove-section=.bss diff --git a/arch/cris/arch-v32/boot/compressed/decompress.ld b/arch/cris/arch-v32/boot/compressed/decompress.ld deleted file mode 100644 index 3c837fe..0000000 --- a/arch/cris/arch-v32/boot/compressed/decompress.ld +++ /dev/null @@ -1,30 +0,0 @@ -/*#OUTPUT_FORMAT(elf32-us-cris) */ -OUTPUT_ARCH (crisv32) - -MEMORY - { - dram : ORIGIN = 0x40700000, - LENGTH = 0x00100000 - } - -SECTIONS -{ - .text : - { - _stext = . ; - *(.text) - *(.rodata) - *(.rodata.*) - _etext = . ; - } > dram - .data : - { - *(.data) - _edata = . ; - } > dram - .bss : - { - *(.bss) - _end = ALIGN( 0x10 ) ; - } > dram -} diff --git a/arch/cris/arch-v32/boot/compressed/decompress.lds b/arch/cris/arch-v32/boot/compressed/decompress.lds new file mode 100644 index 0000000..3c837fe --- /dev/null +++ b/arch/cris/arch-v32/boot/compressed/decompress.lds @@ -0,0 +1,30 @@ +/*#OUTPUT_FORMAT(elf32-us-cris) */ +OUTPUT_ARCH (crisv32) + +MEMORY + { + dram : ORIGIN = 0x40700000, + LENGTH = 0x00100000 + } + +SECTIONS +{ + .text : + { + _stext = . ; + *(.text) + *(.rodata) + *(.rodata.*) + _etext = . ; + } > dram + .data : + { + *(.data) + _edata = . ; + } > dram + .bss : + { + *(.bss) + _end = ALIGN( 0x10 ) ; + } > dram +} diff --git a/arch/cris/arch-v32/boot/rescue/Makefile b/arch/cris/arch-v32/boot/rescue/Makefile index 44ae0ad..566aac6 100644 --- a/arch/cris/arch-v32/boot/rescue/Makefile +++ b/arch/cris/arch-v32/boot/rescue/Makefile @@ -7,7 +7,7 @@ ccflags-y += -O2 -I $(srctree)/include/asm/arch/mach/ \ -I $(srctree)/include/asm/arch asflags-y += -I $(srctree)/include/asm/arch/mach/ -I $(srctree)/include/asm/arch LD = gcc-cris -mlinux -march=v32 -nostdlib -ldflags-y += -T $(srctree)/$(obj)/rescue.ld +ldflags-y += -T $(srctree)/$(src)/rescue.lds LDPOSTFLAGS = -lgcc OBJCOPYFLAGS = -O binary --remove-section=.bss obj-$(CONFIG_ETRAX_AXISFLASHMAP) = head.o diff --git a/arch/cris/arch-v32/boot/rescue/rescue.ld b/arch/cris/arch-v32/boot/rescue/rescue.ld deleted file mode 100644 index 8ac646b..0000000 --- a/arch/cris/arch-v32/boot/rescue/rescue.ld +++ /dev/null @@ -1,43 +0,0 @@ -/*#OUTPUT_FORMAT(elf32-us-cris) */ -OUTPUT_ARCH (crisv32) -/* Now that NAND support has been stripped, this file could be simplified, - * but it doesn't do any harm on the other hand so why bother. */ - -MEMORY - { - bootblk : ORIGIN = 0x38000000, - LENGTH = 0x00004000 - intmem : ORIGIN = 0x38004000, - LENGTH = 0x00005000 - } - -SECTIONS -{ - .text : - { - _stext = . ; - *(.text) - *(.init.text) - *(.rodata) - *(.rodata.*) - _etext = . ; - } > bootblk - .data : - { - *(.data) - _edata = . ; - } > bootblk - .bss : - { - _bss = . ; - *(.bss) - _end = ALIGN( 0x10 ) ; - } > intmem - - /* Get rid of stuff from EXPORT_SYMBOL(foo). */ - /DISCARD/ : - { - *(__ksymtab_strings) - *(__ksymtab) - } -} diff --git a/arch/cris/arch-v32/boot/rescue/rescue.lds b/arch/cris/arch-v32/boot/rescue/rescue.lds new file mode 100644 index 0000000..8ac646b --- /dev/null +++ b/arch/cris/arch-v32/boot/rescue/rescue.lds @@ -0,0 +1,43 @@ +/*#OUTPUT_FORMAT(elf32-us-cris) */ +OUTPUT_ARCH (crisv32) +/* Now that NAND support has been stripped, this file could be simplified, + * but it doesn't do any harm on the other hand so why bother. */ + +MEMORY + { + bootblk : ORIGIN = 0x38000000, + LENGTH = 0x00004000 + intmem : ORIGIN = 0x38004000, + LENGTH = 0x00005000 + } + +SECTIONS +{ + .text : + { + _stext = . ; + *(.text) + *(.init.text) + *(.rodata) + *(.rodata.*) + _etext = . ; + } > bootblk + .data : + { + *(.data) + _edata = . ; + } > bootblk + .bss : + { + _bss = . ; + *(.bss) + _end = ALIGN( 0x10 ) ; + } > intmem + + /* Get rid of stuff from EXPORT_SYMBOL(foo). */ + /DISCARD/ : + { + *(__ksymtab_strings) + *(__ksymtab) + } +} -- cgit v0.10.2 From 94a8c3c2437c8946f1b6c8e0b2c560a7db8ed3c6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 13 Jul 2008 22:19:35 -0700 Subject: x86: let 32bit use apic_ops too - fix fix for pv - clean up the namespace there too. Signed-off-by: Yinghai Lu Cc: Suresh Siddha Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 4f29ff8..e0f1391 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -360,11 +360,6 @@ struct pv_cpu_ops pv_cpu_ops = { struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC -#ifndef CONFIG_X86_64 - .apic_write = native_apic_mem_write, - .apic_write_atomic = native_apic_mem_write_atomic, - .apic_read = native_apic_mem_read, -#endif .setup_boot_clock = setup_boot_APIC_clock, .setup_secondary_clock = setup_secondary_APIC_clock, .startup_ipi_hook = paravirt_nop, diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index b153460..cf30743 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -676,6 +676,50 @@ static inline int __init probe_vmi_rom(void) return 0; } +#ifdef CONFIG_X86_LOCAL_APIC +static u32 vmi_apic_read(u32 reg) +{ + return 0; +} + +static void vmi_apic_write(u32 reg, u32 val) +{ + /* Warn to see if there's any stray references */ + WARN_ON(1); +} + +static u64 vmi_apic_icr_read(void) +{ + return 0; +} + +static void vmi_apic_icr_write(u32 low, u32 id) +{ + /* Warn to see if there's any stray references */ + WARN_ON(1); +} + +static void vmi_apic_wait_icr_idle(void) +{ + return; +} + +static u32 vmi_safe_apic_wait_icr_idle(void) +{ + return 0; +} + +static struct apic_ops vmi_basic_apic_ops = { + .read = vmi_apic_read, + .write = vmi_apic_write, + .write_atomic = vmi_apic_write, + .icr_read = vmi_apic_icr_read, + .icr_write = vmi_apic_icr_write, + .wait_icr_idle = vmi_apic_wait_icr_idle, + .safe_wait_icr_idle = vmi_safe_apic_wait_icr_idle, +}; +#endif + /* * VMI setup common to all processors */ @@ -904,9 +948,10 @@ static inline int __init activate_vmi(void) #endif #ifdef CONFIG_X86_LOCAL_APIC - para_fill(pv_apic_ops.apic_read, APICRead); - para_fill(pv_apic_ops.apic_write, APICWrite); - para_fill(pv_apic_ops.apic_write_atomic, APICWrite); + para_fill(vmi_basic_apic_ops.read, APICRead); + para_fill(vmi_basic_apic_ops.write, APICWrite); + para_fill(vmi_basic_apic_ops.write_atomic, APICWrite); + apic_ops = &vmi_basic_apic_ops; #endif /* diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 54e2556..d11dda7e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -559,7 +559,6 @@ static void xen_apic_write(u32 reg, u32 val) WARN_ON(1); } -#ifdef CONFIG_X86_64 static u64 xen_apic_icr_read(void) { return 0; @@ -576,6 +575,11 @@ static void xen_apic_wait_icr_idle(void) return; } +static u32 xen_safe_apic_wait_icr_idle(void) +{ + return 0; +} + static struct apic_ops xen_basic_apic_ops = { .read = xen_apic_read, .write = xen_apic_write, @@ -583,9 +587,8 @@ static struct apic_ops xen_basic_apic_ops = { .icr_read = xen_apic_icr_read, .icr_write = xen_apic_icr_write, .wait_icr_idle = xen_apic_wait_icr_idle, - .safe_wait_icr_idle = xen_apic_wait_icr_idle, + .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle, }; -#endif #endif @@ -1159,11 +1162,6 @@ static const struct pv_irq_ops xen_irq_ops __initdata = { static const struct pv_apic_ops xen_apic_ops __initdata = { #ifdef CONFIG_X86_LOCAL_APIC -#ifndef CONFIG_X86_64 - .apic_write = xen_apic_write, - .apic_write_atomic = xen_apic_write, - .apic_read = xen_apic_read, -#endif .setup_boot_clock = paravirt_nop, .setup_secondary_clock = paravirt_nop, .startup_ipi_hook = paravirt_nop, @@ -1322,9 +1320,10 @@ asmlinkage void __init xen_start_kernel(void) pv_irq_ops = xen_irq_ops; pv_apic_ops = xen_apic_ops; pv_mmu_ops = xen_mmu_ops; -#ifdef CONFIG_X86_64 + +#ifdef CONFIG_X86_LOCAL_APIC /* - * for 64bit, set up the basic apic ops aswell. + * set up the basic apic ops. */ apic_ops = &xen_basic_apic_ops; #endif diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index 5e34d26..08f89e3 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -200,15 +200,6 @@ struct pv_irq_ops { struct pv_apic_ops { #ifdef CONFIG_X86_LOCAL_APIC -#ifndef CONFIG_X86_64 - /* - * Direct APIC operations, principally for VMI. Ideally - * these shouldn't be in this interface. - */ - void (*apic_write)(u32 reg, u32 v); - void (*apic_write_atomic)(u32 reg, u32 v); - u32 (*apic_read)(u32 reg); -#endif void (*setup_boot_clock)(void); void (*setup_secondary_clock)(void); @@ -890,26 +881,6 @@ static inline void slow_down_io(void) } #ifdef CONFIG_X86_LOCAL_APIC -/* - * Basic functions accessing APICs. - */ -#ifndef CONFIG_X86_64 -static inline void apic_write(u32 reg, u32 v) -{ - PVOP_VCALL2(pv_apic_ops.apic_write, reg, v); -} - -static inline void apic_write_atomic(u32 reg, u32 v) -{ - PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v); -} - -static inline u32 apic_read(u32 reg) -{ - return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg); -} -#endif - static inline void setup_boot_clock(void) { PVOP_VCALL0(pv_apic_ops.setup_boot_clock); -- cgit v0.10.2 From 3cac97cbb14aed00d83eb33d4613b0fe3aaea863 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sun, 6 Jul 2008 17:23:55 +0800 Subject: rcu classic: simplify the next pending batch use a batch number(rcp->pending) instead of a flag(rcp->next_pending) rcu_start_batch() need to change this flag, so mb()s is needed for memory-access safe. but(after this patch applied) rcu_start_batch() do not change this batch number(rcp->pending), rcp->pending is managed by __rcu_process_callbacks only, and troublesome mb()s are eliminated. And codes look simpler and clearer. Signed-off-by: Lai Jiangshan Cc: "Paul E. McKenney" Cc: Dipankar Sarma Cc: Gautham Shenoy Cc: Dhaval Giani Cc: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index 8c77490..c847e59 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -45,7 +45,7 @@ struct rcu_ctrlblk { long cur; /* Current batch number. */ long completed; /* Number of the last completed batch */ - int next_pending; /* Is the next batch already waiting? */ + long pending; /* Number of the last pending batch */ int signaled; diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index 16eeeaa..03726eb 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -60,12 +60,14 @@ EXPORT_SYMBOL_GPL(rcu_lock_map); static struct rcu_ctrlblk rcu_ctrlblk = { .cur = -300, .completed = -300, + .pending = -300, .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock), .cpumask = CPU_MASK_NONE, }; static struct rcu_ctrlblk rcu_bh_ctrlblk = { .cur = -300, .completed = -300, + .pending = -300, .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock), .cpumask = CPU_MASK_NONE, }; @@ -276,14 +278,8 @@ static void rcu_do_batch(struct rcu_data *rdp) */ static void rcu_start_batch(struct rcu_ctrlblk *rcp) { - if (rcp->next_pending && + if (rcp->cur != rcp->pending && rcp->completed == rcp->cur) { - rcp->next_pending = 0; - /* - * next_pending == 0 must be visible in - * __rcu_process_callbacks() before it can see new value of cur. - */ - smp_wmb(); rcp->cur++; /* @@ -441,16 +437,14 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, /* determine batch number */ rdp->batch = rcp->cur + 1; - /* see the comment and corresponding wmb() in - * the rcu_start_batch() - */ - smp_rmb(); - if (!rcp->next_pending) { + if (rcu_batch_after(rdp->batch, rcp->pending)) { /* and start it/schedule start if it's a new batch */ spin_lock(&rcp->lock); - rcp->next_pending = 1; - rcu_start_batch(rcp); + if (rcu_batch_after(rdp->batch, rcp->pending)) { + rcp->pending = rdp->batch; + rcu_start_batch(rcp); + } spin_unlock(&rcp->lock); } } -- cgit v0.10.2 From 5127bed588a2f8f3a1f732de2a8a190b7df5dce3 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sun, 6 Jul 2008 17:23:59 +0800 Subject: rcu classic: new algorithm for callbacks-processing(v2) This is v2, it's a little deference from v1 that I had send to lkml. use ACCESS_ONCE use rcu_batch_after/rcu_batch_before for batch # comparison. rcutorture test result: (hotplugs: do cpu-online/offline once per second) No CONFIG_NO_HZ: OK, 12hours No CONFIG_NO_HZ, hotplugs: OK, 12hours CONFIG_NO_HZ=y: OK, 24hours CONFIG_NO_HZ=y, hotplugs: Failed. (Failed also without my patch applied, exactly the same bug occurred, http://lkml.org/lkml/2008/7/3/24) v1's email thread: http://lkml.org/lkml/2008/6/2/539 v1's description: The code/algorithm of the implement of current callbacks-processing is very efficient and technical. But when I studied it and I found a disadvantage: In multi-CPU systems, when a new RCU callback is being queued(call_rcu[_bh]), this callback will be invoked after the grace period for the batch with batch number = rcp->cur+2 has completed very very likely in current implement. Actually, this callback can be invoked after the grace period for the batch with batch number = rcp->cur+1 has completed. The delay of invocation means that latency of synchronize_rcu() is extended. But more important thing is that the callbacks usually free memory, and these works are delayed too! it's necessary for reclaimer to free memory as soon as possible when left memory is few. A very simple way can solve this problem: a field(struct rcu_head::batch) is added to record the batch number for the RCU callback. And when a new RCU callback is being queued, we determine the batch number for this callback(head->batch = rcp->cur+1) and we move this callback to rdp->donelist if we find that head->batch <= rcp->completed when we process callbacks. This simple way reduces the wait time for invocation a lot. (about 2.5Grace Period -> 1.5Grace Period in average in multi-CPU systems) This is my algorithm. But I do not add any field for struct rcu_head in my implement. We just need to memorize the last 2 batches and their batch number, because these 2 batches include all entries that for whom the grace period hasn't completed. So we use a special linked-list rather than add a field. Please see the comment of struct rcu_data. Signed-off-by: Lai Jiangshan Cc: "Paul E. McKenney" Cc: Dipankar Sarma Cc: Gautham Shenoy Cc: Dhaval Giani Cc: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index c847e59..04c7281 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -66,11 +66,7 @@ static inline int rcu_batch_after(long a, long b) return (a - b) > 0; } -/* - * Per-CPU data for Read-Copy UPdate. - * nxtlist - new callbacks are added here - * curlist - current batch for which quiescent cycle started if any - */ +/* Per-CPU data for Read-Copy UPdate. */ struct rcu_data { /* 1) quiescent state handling : */ long quiescbatch; /* Batch # for grace period */ @@ -78,12 +74,24 @@ struct rcu_data { int qs_pending; /* core waits for quiesc state */ /* 2) batch handling */ - long batch; /* Batch # for current RCU batch */ + /* + * if nxtlist is not NULL, then: + * batch: + * The batch # for the last entry of nxtlist + * [*nxttail[1], NULL = *nxttail[2]): + * Entries that batch # <= batch + * [*nxttail[0], *nxttail[1]): + * Entries that batch # <= batch - 1 + * [nxtlist, *nxttail[0]): + * Entries that batch # <= batch - 2 + * The grace period for these entries has completed, and + * the other grace-period-completed entries may be moved + * here temporarily in rcu_process_callbacks(). + */ + long batch; struct rcu_head *nxtlist; - struct rcu_head **nxttail; + struct rcu_head **nxttail[3]; long qlen; /* # of queued callbacks */ - struct rcu_head *curlist; - struct rcu_head **curtail; struct rcu_head *donelist; struct rcu_head **donetail; long blimit; /* Upper limit on a processed batch */ diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index 03726eb..d3553ee 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -120,6 +120,43 @@ static inline void force_quiescent_state(struct rcu_data *rdp, } #endif +static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp, + struct rcu_data *rdp) +{ + long batch; + smp_mb(); /* reads the most recently updated value of rcu->cur. */ + + /* + * Determine the batch number of this callback. + * + * Using ACCESS_ONCE to avoid the following error when gcc eliminates + * local variable "batch" and emits codes like this: + * 1) rdp->batch = rcp->cur + 1 # gets old value + * ...... + * 2)rcu_batch_after(rcp->cur + 1, rdp->batch) # gets new value + * then [*nxttail[0], *nxttail[1]) may contain callbacks + * that batch# = rdp->batch, see the comment of struct rcu_data. + */ + batch = ACCESS_ONCE(rcp->cur) + 1; + + if (rdp->nxtlist && rcu_batch_after(batch, rdp->batch)) { + /* process callbacks */ + rdp->nxttail[0] = rdp->nxttail[1]; + rdp->nxttail[1] = rdp->nxttail[2]; + if (rcu_batch_after(batch - 1, rdp->batch)) + rdp->nxttail[0] = rdp->nxttail[2]; + } + + rdp->batch = batch; + *rdp->nxttail[2] = head; + rdp->nxttail[2] = &head->next; + + if (unlikely(++rdp->qlen > qhimark)) { + rdp->blimit = INT_MAX; + force_quiescent_state(rdp, &rcu_ctrlblk); + } +} + /** * call_rcu - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. @@ -135,18 +172,11 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { unsigned long flags; - struct rcu_data *rdp; head->func = func; head->next = NULL; local_irq_save(flags); - rdp = &__get_cpu_var(rcu_data); - *rdp->nxttail = head; - rdp->nxttail = &head->next; - if (unlikely(++rdp->qlen > qhimark)) { - rdp->blimit = INT_MAX; - force_quiescent_state(rdp, &rcu_ctrlblk); - } + __call_rcu(head, &rcu_ctrlblk, &__get_cpu_var(rcu_data)); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(call_rcu); @@ -171,20 +201,11 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { unsigned long flags; - struct rcu_data *rdp; head->func = func; head->next = NULL; local_irq_save(flags); - rdp = &__get_cpu_var(rcu_bh_data); - *rdp->nxttail = head; - rdp->nxttail = &head->next; - - if (unlikely(++rdp->qlen > qhimark)) { - rdp->blimit = INT_MAX; - force_quiescent_state(rdp, &rcu_bh_ctrlblk); - } - + __call_rcu(head, &rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data)); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(call_rcu_bh); @@ -213,12 +234,6 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); static inline void raise_rcu_softirq(void) { raise_softirq(RCU_SOFTIRQ); - /* - * The smp_mb() here is required to ensure that this cpu's - * __rcu_process_callbacks() reads the most recently updated - * value of rcu->cur. - */ - smp_mb(); } /* @@ -360,13 +375,15 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, * which is dead and hence not processing interrupts. */ static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list, - struct rcu_head **tail) + struct rcu_head **tail, long batch) { - local_irq_disable(); - *this_rdp->nxttail = list; - if (list) - this_rdp->nxttail = tail; - local_irq_enable(); + if (list) { + local_irq_disable(); + this_rdp->batch = batch; + *this_rdp->nxttail[2] = list; + this_rdp->nxttail[2] = tail; + local_irq_enable(); + } } static void __rcu_offline_cpu(struct rcu_data *this_rdp, @@ -380,9 +397,9 @@ static void __rcu_offline_cpu(struct rcu_data *this_rdp, if (rcp->cur != rcp->completed) cpu_quiet(rdp->cpu, rcp); spin_unlock_bh(&rcp->lock); - rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail); - rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail); - rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail); + /* spin_lock implies smp_mb() */ + rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail, rcp->cur + 1); + rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail[2], rcp->cur + 1); local_irq_disable(); this_rdp->qlen += rdp->qlen; @@ -416,27 +433,37 @@ static void rcu_offline_cpu(int cpu) static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) { - if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) { - *rdp->donetail = rdp->curlist; - rdp->donetail = rdp->curtail; - rdp->curlist = NULL; - rdp->curtail = &rdp->curlist; - } - - if (rdp->nxtlist && !rdp->curlist) { + if (rdp->nxtlist) { local_irq_disable(); - rdp->curlist = rdp->nxtlist; - rdp->curtail = rdp->nxttail; - rdp->nxtlist = NULL; - rdp->nxttail = &rdp->nxtlist; - local_irq_enable(); /* - * start the next batch of callbacks + * move the other grace-period-completed entries to + * [rdp->nxtlist, *rdp->nxttail[0]) temporarily + */ + if (!rcu_batch_before(rcp->completed, rdp->batch)) + rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2]; + else if (!rcu_batch_before(rcp->completed, rdp->batch - 1)) + rdp->nxttail[0] = rdp->nxttail[1]; + + /* + * the grace period for entries in + * [rdp->nxtlist, *rdp->nxttail[0]) has completed and + * move these entries to donelist */ + if (rdp->nxttail[0] != &rdp->nxtlist) { + *rdp->donetail = rdp->nxtlist; + rdp->donetail = rdp->nxttail[0]; + rdp->nxtlist = *rdp->nxttail[0]; + *rdp->donetail = NULL; + + if (rdp->nxttail[1] == rdp->nxttail[0]) + rdp->nxttail[1] = &rdp->nxtlist; + if (rdp->nxttail[2] == rdp->nxttail[0]) + rdp->nxttail[2] = &rdp->nxtlist; + rdp->nxttail[0] = &rdp->nxtlist; + } - /* determine batch number */ - rdp->batch = rcp->cur + 1; + local_irq_enable(); if (rcu_batch_after(rdp->batch, rcp->pending)) { /* and start it/schedule start if it's a new batch */ @@ -462,15 +489,26 @@ static void rcu_process_callbacks(struct softirq_action *unused) static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) { - /* This cpu has pending rcu entries and the grace period - * for them has completed. - */ - if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) - return 1; + if (rdp->nxtlist) { + /* + * This cpu has pending rcu entries and the grace period + * for them has completed. + */ + if (!rcu_batch_before(rcp->completed, rdp->batch)) + return 1; + if (!rcu_batch_before(rcp->completed, rdp->batch - 1) && + rdp->nxttail[0] != rdp->nxttail[1]) + return 1; + if (rdp->nxttail[0] != &rdp->nxtlist) + return 1; - /* This cpu has no pending entries, but there are new entries */ - if (!rdp->curlist && rdp->nxtlist) - return 1; + /* + * This cpu has pending rcu entries and the new batch + * for then hasn't been started nor scheduled start + */ + if (rcu_batch_after(rdp->batch, rcp->pending)) + return 1; + } /* This cpu has finished callbacks to invoke */ if (rdp->donelist) @@ -506,7 +544,7 @@ int rcu_needs_cpu(int cpu) struct rcu_data *rdp = &per_cpu(rcu_data, cpu); struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu); - return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu)); + return !!rdp->nxtlist || !!rdp_bh->nxtlist || rcu_pending(cpu); } void rcu_check_callbacks(int cpu, int user) @@ -553,8 +591,7 @@ static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, struct rcu_data *rdp) { memset(rdp, 0, sizeof(*rdp)); - rdp->curtail = &rdp->curlist; - rdp->nxttail = &rdp->nxtlist; + rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2] = &rdp->nxtlist; rdp->donetail = &rdp->donelist; rdp->quiescbatch = rcp->completed; rdp->qs_pending = 0; -- cgit v0.10.2 From 9a8f0e6b5dfe3b4f330fc82b16a4000f5688fce8 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 18 Jul 2008 09:59:40 -0700 Subject: x86: let 32bit use apic_ops too - fix Fix VMI apic_ops. Signed-off-by: Suresh Siddha Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 47ff978..cb54d9e 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -145,6 +145,11 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } +/* + * Paravirt kernels also might be using these below ops. So we still + * use generic apic_read()/apic_write(), which might be pointing to different + * ops in PARAVIRT case. + */ void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index cf30743..2370828 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -676,50 +676,6 @@ static inline int __init probe_vmi_rom(void) return 0; } -#ifdef CONFIG_X86_LOCAL_APIC -static u32 vmi_apic_read(u32 reg) -{ - return 0; -} - -static void vmi_apic_write(u32 reg, u32 val) -{ - /* Warn to see if there's any stray references */ - WARN_ON(1); -} - -static u64 vmi_apic_icr_read(void) -{ - return 0; -} - -static void vmi_apic_icr_write(u32 low, u32 id) -{ - /* Warn to see if there's any stray references */ - WARN_ON(1); -} - -static void vmi_apic_wait_icr_idle(void) -{ - return; -} - -static u32 vmi_safe_apic_wait_icr_idle(void) -{ - return 0; -} - -static struct apic_ops vmi_basic_apic_ops = { - .read = vmi_apic_read, - .write = vmi_apic_write, - .write_atomic = vmi_apic_write, - .icr_read = vmi_apic_icr_read, - .icr_write = vmi_apic_icr_write, - .wait_icr_idle = vmi_apic_wait_icr_idle, - .safe_wait_icr_idle = vmi_safe_apic_wait_icr_idle, -}; -#endif - /* * VMI setup common to all processors */ @@ -948,10 +904,9 @@ static inline int __init activate_vmi(void) #endif #ifdef CONFIG_X86_LOCAL_APIC - para_fill(vmi_basic_apic_ops.read, APICRead); - para_fill(vmi_basic_apic_ops.write, APICWrite); - para_fill(vmi_basic_apic_ops.write_atomic, APICWrite); - apic_ops = &vmi_basic_apic_ops; + para_fill(apic_ops->read, APICRead); + para_fill(apic_ops->write, APICWrite); + para_fill(apic_ops->write_atomic, APICWrite); #endif /* -- cgit v0.10.2 From 511d9d34183662aada3890883e860b151d707e22 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 14 Jul 2008 09:49:14 -0700 Subject: x86: apic_ops for lguest apic_ops for lguest. Signed-off-by: Suresh Siddha Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 0c45df2..675ee7a 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -791,6 +791,37 @@ static u32 lguest_apic_read(u32 reg) { return 0; } + +static u64 lguest_apic_icr_read(void) +{ + return 0; +} + +static void lguest_apic_icr_write(u32 low, u32 id) +{ + /* Warn to see if there's any stray references */ + WARN_ON(1); +} + +static void lguest_apic_wait_icr_idle(void) +{ + return; +} + +static u32 lguest_apic_safe_wait_icr_idle(void) +{ + return 0; +} + +static struct apic_ops lguest_basic_apic_ops = { + .read = lguest_apic_read, + .write = lguest_apic_write, + .write_atomic = lguest_apic_write, + .icr_read = lguest_apic_icr_read, + .icr_write = lguest_apic_icr_write, + .wait_icr_idle = lguest_apic_wait_icr_idle, + .safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle, +}; #endif /* STOP! Until an interrupt comes in. */ @@ -990,9 +1021,7 @@ __init void lguest_init(void) #ifdef CONFIG_X86_LOCAL_APIC /* apic read/write intercepts */ - pv_apic_ops.apic_write = lguest_apic_write; - pv_apic_ops.apic_write_atomic = lguest_apic_write; - pv_apic_ops.apic_read = lguest_apic_read; + apic_ops = &lguest_basic_apic_ops; #endif /* time operations */ -- cgit v0.10.2 From f586bf7df9acc26b68b0feefc0dd32fa63516d3a Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 18 Jul 2008 15:58:35 -0700 Subject: x86: APIC: Remove apic_write_around(); use alternatives, merge fix On Fri, Jul 18, 2008 at 02:03:48PM -0700, Ingo Molnar wrote: > > * Yinghai Lu wrote: > > > > git merge tip/x86/x2apic > > CONFLICT (content): Merge conflict in arch/x86/kernel/Makefile > > CONFLICT (content): Merge conflict in arch/x86/kernel/paravirt.c > > CONFLICT (content): Merge conflict in arch/x86/kernel/smpboot.c > > CONFLICT (content): Merge conflict in arch/x86/kernel/vmi_32.c > > CONFLICT (content): Merge conflict in arch/x86/xen/enlighten.c > > CONFLICT (content): Merge conflict in include/asm-x86/apic.h > > CONFLICT (content): Merge conflict in include/asm-x86/paravirt.h > > that's due to the changes in tip/x86/apic and in tip/x86/uv. > > ok, i've just merged x86/apic into x86/x2apic and x86/uv as well, and > pushed out the result. > > Note: it's a first raw merge and completely untested. It will now merge > cleanly into tip/master. There are probably a few details missing. Ingo, thanks for doing this. While I was testing my merge changes, you posted yours... anyhow we need this piece, which is missing from your merge. Signed-off-by: Suresh Siddha Cc: Yinghai Lu Cc: "Maciej W. Rozycki" Cc: Jeremy Fitzhardinge Cc: Zachary Amsden Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index dcb897f..8728f54 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -174,8 +174,8 @@ u32 safe_xapic_wait_icr_idle(void) void xapic_icr_write(u32 low, u32 id) { - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id)); - apic_write_around(APIC_ICR, low); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); + apic_write(APIC_ICR, low); } u64 xapic_icr_read(void) @@ -191,7 +191,6 @@ u64 xapic_icr_read(void) static struct apic_ops xapic_ops = { .read = native_apic_mem_read, .write = native_apic_mem_write, - .write_atomic = native_apic_mem_write_atomic, .icr_read = xapic_icr_read, .icr_write = xapic_icr_write, .wait_icr_idle = xapic_wait_icr_idle, diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 46e6124..a850bc6 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -167,7 +167,6 @@ u64 xapic_icr_read(void) static struct apic_ops xapic_ops = { .read = native_apic_mem_read, .write = native_apic_mem_write, - .write_atomic = native_apic_mem_write_atomic, .icr_read = xapic_icr_read, .icr_write = xapic_icr_write, .wait_icr_idle = xapic_wait_icr_idle, @@ -206,7 +205,6 @@ u64 x2apic_icr_read(void) static struct apic_ops x2apic_ops = { .read = native_apic_msr_read, .write = native_apic_msr_write, - .write_atomic = native_apic_msr_write, .icr_read = x2apic_icr_read, .icr_write = x2apic_icr_write, .wait_icr_idle = x2apic_wait_icr_idle, -- cgit v0.10.2 From caf43bf7c6a55e89b6df5179df434d67e24aa32e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 20 Jul 2008 14:06:50 +0200 Subject: x86, xen: fix apic_ops build on UP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: arch/x86/xen/enlighten.c:615: error: variable ‘xen_basic_apic_ops’ has initializer but incomplete type arch/x86/xen/enlighten.c:616: error: unknown field ‘read’ specified in initializer [...] Signed-off-by: Ingo Molnar diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 008b7b6..e4d1459 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include -- cgit v0.10.2 From 7be42004065ce4df193aeef5befd26805267d0d9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 20 Jul 2008 17:04:57 +0200 Subject: x86, lguest: fix apic_ops build on UP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: arch/x86/lguest/boot.c:816: error: variable ‘lguest_basic_apic_ops’ has initializer but incomplete type arch/x86/lguest/boot.c:817: error: unknown field ‘read’ specified in initializer [...] Signed-off-by: Ingo Molnar diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 35c4349..756fc48 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include -- cgit v0.10.2 From 1b9b89e7f163336ad84200b66a17284dbf26aced Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 21 Jul 2008 22:08:21 -0700 Subject: x86: add apic probe for genapic 64bit, v2 introducing an APIC handling probing abstraction: static struct genapic *apic_probe[] __initdata = { &apic_x2apic_uv_x, &apic_x2apic_phys, &apic_x2apic_cluster, &apic_physflat, NULL, }; This way we can remove UV, x2apic specific code from genapic_64.c and move them to their specific genapic files. [ v2: fix compiling when CONFIG_ACPI is not set ] Signed-off-by: Yinghai Lu Cc: Jack Steiner Cc: Suresh Siddha Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 3940d81..b3ba969 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -16,62 +16,37 @@ #include #include #include -#include #include #include #include -#ifdef CONFIG_ACPI -#include -#endif - -DEFINE_PER_CPU(int, x2apic_extra_bits); +extern struct genapic apic_flat; +extern struct genapic apic_physflat; +extern struct genapic apic_x2xpic_uv_x; +extern struct genapic apic_x2apic_phys; +extern struct genapic apic_x2apic_cluster; struct genapic __read_mostly *genapic = &apic_flat; -static int x2apic_phys = 0; - -static int set_x2apic_phys_mode(char *arg) -{ - x2apic_phys = 1; - return 0; -} -early_param("x2apic_phys", set_x2apic_phys_mode); - -static enum uv_system_type uv_system_type; +static struct genapic *apic_probe[] __initdata = { + &apic_x2apic_uv_x, + &apic_x2apic_phys, + &apic_x2apic_cluster, + &apic_physflat, + NULL, +}; /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. */ void __init setup_apic_routing(void) { - if (uv_system_type == UV_NON_UNIQUE_APIC) - genapic = &apic_x2apic_uv_x; - else if (cpu_has_x2apic && intr_remapping_enabled) { - if (x2apic_phys) - genapic = &apic_x2apic_phys; - else - genapic = &apic_x2apic_cluster; - } else -#ifdef CONFIG_ACPI - /* - * Quirk: some x86_64 machines can only use physical APIC mode - * regardless of how many processors are present (x86_64 ES7000 - * is an example). - */ - if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && - (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) - genapic = &apic_physflat; - else -#endif - - if (max_physical_apicid < 8) - genapic = &apic_flat; - else - genapic = &apic_physflat; - - printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); + if (genapic == &apic_flat) { + if (max_physical_apicid >= 8) + genapic = &apic_physflat; + printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); + } } /* Same for both flat and physical. */ @@ -83,23 +58,15 @@ void apic_send_IPI_self(int vector) int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (!strcmp(oem_id, "SGI")) { - if (!strcmp(oem_table_id, "UVL")) - uv_system_type = UV_LEGACY_APIC; - else if (!strcmp(oem_table_id, "UVX")) - uv_system_type = UV_X2APIC; - else if (!strcmp(oem_table_id, "UVH")) - uv_system_type = UV_NON_UNIQUE_APIC; + int i; + + for (i = 0; apic_probe[i]; ++i) { + if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { + genapic = apic_probe[i]; + printk(KERN_INFO "Setting APIC routing to %s.\n", + genapic->name); + return 1; + } } return 0; } - -enum uv_system_type get_uv_system_type(void) -{ - return uv_system_type; -} - -int is_uv_system(void) -{ - return uv_system_type != UV_NONE; -} diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 2c973cb..1740b83 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -21,6 +21,15 @@ #include #include +#ifdef CONFIG_ACPI +#include +#endif + +static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return 1; +} + static cpumask_t flat_target_cpus(void) { return cpu_online_map; @@ -138,6 +147,7 @@ static unsigned int phys_pkg_id(int index_msb) struct genapic apic_flat = { .name = "flat", + .acpi_madt_oem_check = flat_acpi_madt_oem_check, .int_delivery_mode = dest_LowestPrio, .int_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = flat_target_cpus, @@ -160,6 +170,21 @@ struct genapic apic_flat = { * We cannot use logical delivery in this case because the mask * overflows, so use physical mode. */ +static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ +#ifdef CONFIG_ACPI + /* + * Quirk: some x86_64 machines can only use physical APIC mode + * regardless of how many processors are present (x86_64 ES7000 + * is an example). + */ + if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) + return 1; +#endif + + return 0; +} static cpumask_t physflat_target_cpus(void) { @@ -206,6 +231,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) struct genapic apic_physflat = { .name = "physical flat", + .acpi_madt_oem_check = physflat_acpi_madt_oem_check, .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = physflat_target_cpus, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 40bc014..ef3f318 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -4,12 +4,22 @@ #include #include #include +#include + #include #include #include DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); +static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (cpu_has_x2apic && intr_remapping_enabled) + return 1; + + return 0; +} + /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ static cpumask_t x2apic_target_cpus(void) @@ -135,6 +145,7 @@ static void init_x2apic_ldr(void) struct genapic apic_x2apic_cluster = { .name = "cluster x2apic", + .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, .int_delivery_mode = dest_LowestPrio, .int_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = x2apic_target_cpus, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 2f3c6ca..f35a3bf 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -4,10 +4,30 @@ #include #include #include +#include + #include #include #include +DEFINE_PER_CPU(int, x2apic_extra_bits); + +static int x2apic_phys; + +static int set_x2apic_phys_mode(char *arg) +{ + x2apic_phys = 1; + return 0; +} +early_param("x2apic_phys", set_x2apic_phys_mode); + +static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (cpu_has_x2apic && intr_remapping_enabled && x2apic_phys) + return 1; + + return 0; +} /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ @@ -122,6 +142,7 @@ void init_x2apic_ldr(void) struct genapic apic_x2apic_phys = { .name = "physical x2apic", + .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = x2apic_target_cpus, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index a8e5cb4..a882bc3 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -27,6 +27,33 @@ #include #include +static enum uv_system_type uv_system_type; + +static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (!strcmp(oem_id, "SGI")) { + if (!strcmp(oem_table_id, "UVL")) + uv_system_type = UV_LEGACY_APIC; + else if (!strcmp(oem_table_id, "UVX")) + uv_system_type = UV_X2APIC; + else if (!strcmp(oem_table_id, "UVH")) { + uv_system_type = UV_NON_UNIQUE_APIC; + return 1; + } + } + return 0; +} + +enum uv_system_type get_uv_system_type(void) +{ + return uv_system_type; +} + +int is_uv_system(void) +{ + return uv_system_type != UV_NONE; +} + DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); @@ -153,7 +180,7 @@ static unsigned int get_apic_id(unsigned long x) return id; } -static long set_apic_id(unsigned int id) +static unsigned long set_apic_id(unsigned int id) { unsigned long x; @@ -182,6 +209,7 @@ static void uv_send_IPI_self(int vector) struct genapic apic_x2apic_uv_x = { .name = "UV large system", + .acpi_madt_oem_check = uv_acpi_madt_oem_check, .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = uv_target_cpus, @@ -433,3 +461,5 @@ void __cpuinit uv_cpu_init(void) if (get_uv_system_type() == UV_NON_UNIQUE_APIC) set_x2apic_extra_bits(uv_hub_info->pnode); } + + diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index 2871b3f..1e832e4 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -14,6 +14,7 @@ struct genapic { char *name; + int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); u32 int_delivery_mode; u32 int_dest_mode; int (*apic_id_registered)(void); -- cgit v0.10.2 From 026e2c05ef58ef413e2d52696f125d5ea1aa8bce Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Jul 2008 11:58:14 +0200 Subject: x86, cyrix: debug Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 3fd7a67..db5868c 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -116,7 +116,7 @@ static void __cpuinit set_cx86_reorder(void) setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* Load/Store Serialize to mem access disable (=reorder it) */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); + setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80); /* set load/store serialize from 1GB to 4GB */ ccr3 |= 0xe0; setCx86(CX86_CCR3, ccr3); @@ -127,11 +127,11 @@ static void __cpuinit set_cx86_memwb(void) printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); /* CCR2 bit 2: unlock NW bit */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); + setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04); /* set 'Not Write-through' */ write_cr0(read_cr0() | X86_CR0_NW); /* CCR2 bit 2: lock NW bit and set WT1 */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); + setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14); } static void __cpuinit set_cx86_inc(void) @@ -144,10 +144,10 @@ static void __cpuinit set_cx86_inc(void) setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* PCR1 -- Performance Control */ /* Incrementor on, whatever that is */ - setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); + setCx86_old(CX86_PCR1, getCx86_old(CX86_PCR1) | 0x02); /* PCR0 -- Performance Control */ /* Incrementor Margin 10 */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); + setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) | 0x04); setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ } @@ -162,14 +162,14 @@ static void __cpuinit geode_configure(void) local_irq_save(flags); /* Suspend on halt power saving and enable #SUSP pin */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); + setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* FPU fast, DTE cache, Mem bypass */ - setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); + setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38); setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ set_cx86_memwb(); @@ -286,7 +286,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) /* GXm supports extended cpuid levels 'ala' AMD */ if (c->cpuid_level == 2) { /* Enable cxMMX extensions (GX1 Datasheet 54) */ - setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); + setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1); /* * GXm : 0x30 ... 0x5f GXm datasheet 51 @@ -309,7 +309,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) if (dir1 > 7) { dir0_msn++; /* M II */ /* Enable MMX extensions (App note 108) */ - setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); + setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1); } else { c->coma_bug = 1; /* 6x86MX, it has the bug. */ } @@ -424,7 +424,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */ + setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); /* enable cpuid */ setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ local_irq_restore(flags); } diff --git a/include/asm-x86/processor-cyrix.h b/include/asm-x86/processor-cyrix.h index 97568ad..1198f2a 100644 --- a/include/asm-x86/processor-cyrix.h +++ b/include/asm-x86/processor-cyrix.h @@ -28,3 +28,11 @@ static inline void setCx86(u8 reg, u8 data) outb(reg, 0x22); outb(data, 0x23); } + +#define getCx86_old(reg) ({ outb((reg), 0x22); inb(0x23); }) + +#define setCx86_old(reg, data) do { \ + outb((reg), 0x22); \ + outb((data), 0x23); \ +} while (0) + -- cgit v0.10.2 From bbc1f698a508927d21324b57500e863f9bd562b9 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 21:34:13 +0530 Subject: x86: Introducing asm/syscalls.h Declaring arch-dependent syscalls for x86 architecture Signed-off-by: Jaswinder Singh diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 50e5e4a..1919143 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -14,6 +14,7 @@ #include #include #include +#include /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ static void set_bitmap(unsigned long *bitmap, unsigned int base, diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index a844957..c49ff2d 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -18,6 +18,7 @@ #include #include #include +#include #ifdef CONFIG_SMP static void flush_ldt(void *current_mm) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0c3927a..6490e49 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -55,6 +55,7 @@ #include #include #include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e8a8e1b..c78090d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -51,6 +51,7 @@ #include #include #include +#include asmlinkage extern void ret_from_fork(void); diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 07faaa5..5cede10 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "sigframe.h" diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index bf87684..b95a0a6 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "sigframe.h" #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index 7066cb8..1884a8d 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -22,6 +22,8 @@ #include #include +#include + asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 3b360ef..c9288c8 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -16,6 +16,7 @@ #include #include +#include asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off) diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index ab6bf37..6bb7b85 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "tls.h" diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 38f566f..4eeb5cf 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -46,6 +46,7 @@ #include #include #include +#include /* * Known problems: diff --git a/include/asm-x86/syscalls.h b/include/asm-x86/syscalls.h new file mode 100644 index 0000000..170fcb1 --- /dev/null +++ b/include/asm-x86/syscalls.h @@ -0,0 +1,90 @@ +/* + * syscalls.h - Linux syscall interfaces (arch-specific) + * + * Copyright (c) 2008 Jaswinder Singh + * + * This file is released under the GPLv2. + * See the file COPYING for more details. + * + * Please do not call me directly, include linux/syscalls.h + */ + +#ifndef _ASM_X86_SYSCALLS_H +#define _ASM_X86_SYSCALLS_H + +#include +#include +#include +#include + +/* Common in X86_32 and X86_64 */ +/* kernel/ioport.c */ +asmlinkage long sys_ioperm(unsigned long, unsigned long, int); + +/* X86_32 only */ +#ifdef CONFIG_X86_32 +/* kernel/process_32.c */ +asmlinkage int sys_fork(struct pt_regs); +asmlinkage int sys_clone(struct pt_regs); +asmlinkage int sys_vfork(struct pt_regs); +asmlinkage int sys_execve(struct pt_regs); + +/* kernel/signal_32.c */ +asmlinkage int sys_sigsuspend(int, int, old_sigset_t); +asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, + struct old_sigaction __user *); +asmlinkage int sys_sigaltstack(unsigned long); +asmlinkage unsigned long sys_sigreturn(unsigned long); +asmlinkage int sys_rt_sigreturn(unsigned long); + +/* kernel/ioport.c */ +asmlinkage long sys_iopl(unsigned long); + +/* kernel/ldt.c */ +asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); + +/* kernel/sys_i386_32.c */ +asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long); +struct mmap_arg_struct; +asmlinkage int old_mmap(struct mmap_arg_struct __user *); +struct sel_arg_struct; +asmlinkage int old_select(struct sel_arg_struct __user *); +asmlinkage int sys_ipc(uint, int, int, int, void __user *, long); +struct old_utsname; +asmlinkage int sys_uname(struct old_utsname __user *); +struct oldold_utsname; +asmlinkage int sys_olduname(struct oldold_utsname __user *); + +/* kernel/tls.c */ +asmlinkage int sys_set_thread_area(struct user_desc __user *); +asmlinkage int sys_get_thread_area(struct user_desc __user *); + +#else /* CONFIG_X86_32 */ + +/* X86_64 only */ +/* kernel/process_64.c */ +asmlinkage long sys_fork(struct pt_regs *); +asmlinkage long sys_clone(unsigned long, unsigned long, + void __user *, void __user *, + struct pt_regs *); +asmlinkage long sys_vfork(struct pt_regs *); +asmlinkage long sys_execve(char __user *, char __user * __user *, + char __user * __user *, + struct pt_regs *); + +/* kernel/ioport.c */ +asmlinkage long sys_iopl(unsigned int, struct pt_regs *); + +/* kernel/signal_64.c */ +asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, + struct pt_regs *); +asmlinkage long sys_rt_sigreturn(struct pt_regs *); + +/* kernel/sys_x86_64.c */ +asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long); +asmlinkage long sys_uname(struct new_utsname __user *); + +#endif /* CONFIG_X86_32 */ +#endif /* _ASM_X86_SYSCALLS_H */ -- cgit v0.10.2 From fb26132b441e75d6ba9996efc29b42081aee0abd Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 21:36:40 +0530 Subject: x86: process_32.c declare cpu_number before they get used Moved DECLARE_PER_CPU(int, cpu_number) from CONFIG_X86_32_SMP to CONFIG_X86_32 because cpu_number is required for both. And include asm/smp.h in process_32.c Signed-off-by: Jaswinder Singh diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 6490e49..7218bcc 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -56,6 +56,7 @@ #include #include #include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index 3c877f7..3a14806 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -34,6 +34,9 @@ extern cpumask_t cpu_initialized; DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_t, cpu_core_map); DECLARE_PER_CPU(u16, cpu_llc_id); +#ifdef CONFIG_X86_32 +DECLARE_PER_CPU(int, cpu_number); +#endif DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); @@ -142,7 +145,6 @@ extern unsigned disabled_cpus __cpuinitdata; * from the initial startup. We map APIC_BASE very early in page_setup(), * so this is correct in the x86 case. */ -DECLARE_PER_CPU(int, cpu_number); #define raw_smp_processor_id() (x86_read_percpu(cpu_number)) extern int safe_smp_processor_id(void); -- cgit v0.10.2 From b994b6c0332a5499b33880855dadad04d74cde54 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 21:37:52 +0530 Subject: x86: signal_XX.c declare do_notify_resume before they get used Signed-off-by: Jaswinder Singh diff --git a/include/asm-x86/signal.h b/include/asm-x86/signal.h index 6dac493..5e4a1b0 100644 --- a/include/asm-x86/signal.h +++ b/include/asm-x86/signal.h @@ -140,6 +140,9 @@ struct sigaction { struct k_sigaction { struct sigaction sa; }; + +extern void do_notify_resume(struct pt_regs *, void *, __u32); + # else /* __KERNEL__ */ /* Here we must cater to libcs that poke about in kernel headers. */ -- cgit v0.10.2 From cc0384917bf69079088701a0725c5fc6b554bf35 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 21:52:51 +0530 Subject: x86: time_XX.c declare functions before they get used Declare time_init() in asm-x86/time.h Also did cleanup in asm-x86/timer.h : timer_ack is only required for X86_32 int recalibrate_cpu_khz(void) is for X86_32 Signed-off-by: Jaswinder Singh diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index ffe3c66..bbecf8b 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "do_timer.h" diff --git a/include/asm-x86/time.h b/include/asm-x86/time.h index a17fa47..5f4fc3e 100644 --- a/include/asm-x86/time.h +++ b/include/asm-x86/time.h @@ -46,6 +46,8 @@ static inline int native_set_wallclock(unsigned long nowtime) #endif +extern void time_init(void); + #ifdef CONFIG_PARAVIRT #include #else /* !CONFIG_PARAVIRT */ diff --git a/include/asm-x86/timer.h b/include/asm-x86/timer.h index fb2a4dd..2a8a92d 100644 --- a/include/asm-x86/timer.h +++ b/include/asm-x86/timer.h @@ -9,9 +9,12 @@ unsigned long long native_sched_clock(void); unsigned long native_calibrate_tsc(void); +#ifdef CONFIG_X86_32 extern int timer_ack; -extern int no_timer_check; extern int recalibrate_cpu_khz(void); +#endif /* CONFIG_X86_32 */ + +extern int no_timer_check; #ifndef CONFIG_PARAVIRT #define calibrate_tsc() native_calibrate_tsc() -- cgit v0.10.2 From 5314d48ed54c1a0111c597d1510f77850a1b3232 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 22:12:23 +0530 Subject: x86: setup.c declare saved_video_mode before they get used Signed-off-by: Jaswinder Singh diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h index a07c6f1..946f8f2 100644 --- a/include/asm-x86/setup.h +++ b/include/asm-x86/setup.h @@ -41,6 +41,7 @@ struct x86_quirks { }; extern struct x86_quirks *x86_quirks; +extern unsigned long saved_video_mode; #ifndef CONFIG_PARAVIRT #define paravirt_post_allocator_init() do {} while (0) -- cgit v0.10.2 From a7b7511ac1404eaf0e7b6c445a7c61b48ccfcf0b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 22:19:29 +0530 Subject: x86: e820.c declare pci_mem_start before they get used Signed-off-by: Jaswinder Singh diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index 16a31e2..213d1ed 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -64,6 +64,7 @@ struct e820map { extern struct e820map e820; extern struct e820map e820_saved; +extern unsigned long pci_mem_start; extern int e820_any_mapped(u64 start, u64 end, unsigned type); extern int e820_all_mapped(u64 start, u64 end, unsigned type); extern void e820_add_region(u64 start, u64 size, int type); -- cgit v0.10.2 From 9321b8cbbbf3a8dbd4748e3722facaeb8401bd13 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 22:24:29 +0530 Subject: x86: pci-dma.c declare iommu_bio_merge before they get used moved iommu_bio_merge from io_64.h to io.h because it is required for both. Signed-off-by: Jaswinder Singh diff --git a/include/asm-x86/io.h b/include/asm-x86/io.h index bf5d629..a145210 100644 --- a/include/asm-x86/io.h +++ b/include/asm-x86/io.h @@ -73,6 +73,8 @@ build_mmio_write(__writeq, "q", unsigned long, "r", ) #define writeq writeq #endif +extern int iommu_bio_merge; + #ifdef CONFIG_X86_32 # include "io_32.h" #else diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h index ddd8058..1e27137 100644 --- a/include/asm-x86/io_64.h +++ b/include/asm-x86/io_64.h @@ -233,7 +233,6 @@ void memset_io(volatile void __iomem *a, int b, size_t c); #define flush_write_buffers() -extern int iommu_bio_merge; #define BIO_VMERGE_BOUNDARY iommu_bio_merge /* -- cgit v0.10.2 From 791b897ccd1a2c6c184b88ca6d1aaf053499c3df Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 22:28:22 +0530 Subject: x86: pci-nommu.c declare nommu_dma_ops before they get used Signed-off-by: Jaswinder Singh diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index c2ddd3d..f14b04d 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -57,6 +57,7 @@ struct dma_mapping_ops { }; extern const struct dma_mapping_ops *dma_ops; +extern const struct dma_mapping_ops nommu_dma_ops; static inline int dma_mapping_error(dma_addr_t dma_addr) { -- cgit v0.10.2 From 36454936c00c700ae86b5ff376d3c1c1a862c4f5 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 22:31:57 +0530 Subject: x86: i387.c declare dump_fpu() before they get used Signed-off-by: Jaswinder Singh diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index 37672f7..4878989 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h @@ -24,6 +24,7 @@ extern void mxcsr_feature_mask_init(void); extern int init_fpu(struct task_struct *child); extern asmlinkage void math_state_restore(void); extern void init_thread_xstate(void); +extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern user_regset_active_fn fpregs_active, xfpregs_active; extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; -- cgit v0.10.2 From c1686aeaf0780055ffcd4b224b73d5ada77630e8 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 22:35:38 +0530 Subject: x86: ptrace.c declare functions before they get used Signed-off-by: Jaswinder Singh diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h index 8a71db8..48c1d5a 100644 --- a/include/asm-x86/ptrace.h +++ b/include/asm-x86/ptrace.h @@ -148,6 +148,9 @@ extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, void signal_fault(struct pt_regs *regs, void __user *frame, char *where); #endif +extern long syscall_trace_enter(struct pt_regs *); +extern void syscall_trace_leave(struct pt_regs *); + static inline unsigned long regs_return_value(struct pt_regs *regs) { return regs->ax; -- cgit v0.10.2 From 1c6c727d9c12c84a612abe31b60948f06fc2ab2d Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 22:40:37 +0530 Subject: x86: proc.c declare cpuinfo_op before they get used Signed-off-by: Jaswinder Singh diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 15cb82a..93b6adc 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -140,6 +140,8 @@ DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); #define current_cpu_data boot_cpu_data #endif +extern const struct seq_operations cpuinfo_op; + static inline int hlt_works(int cpu) { #ifdef CONFIG_X86_32 -- cgit v0.10.2 From 8fd329a1ac696973ba5467c510302ae1248cc11a Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 22:54:56 +0530 Subject: x86: common.c declare idle_regs before they get used Signed-off-by: Jaswinder Singh diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 93b6adc..a2fb5a4 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -155,6 +155,8 @@ static inline int hlt_works(int cpu) extern void cpu_detect(struct cpuinfo_x86 *c); +extern struct pt_regs *idle_regs(struct pt_regs *); + extern void early_cpu_init(void); extern void identify_boot_cpu(void); extern void identify_secondary_cpu(struct cpuinfo_x86 *); -- cgit v0.10.2 From 1e84911c6c37fd1080ef07039e19c346628b31db Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Mon, 21 Jul 2008 22:58:29 +0530 Subject: x86: mtrr/main.c declare range_state as static Signed-off-by: Jaswinder Singh diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6f23969..b5ade28 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -729,7 +729,7 @@ struct var_mtrr_range_state { mtrr_type type; }; -struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; +static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; static int __initdata debug_print; static int __init -- cgit v0.10.2 From df1be4372eae5f104b7fb4991bc4b35f00b11a11 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 22 Jul 2008 10:13:11 -0400 Subject: x64, apic: use generic apic_write() for ack_APIC_irq() I tested tip/master and found an issue (patch attached) for x2apic support. This is not because of the recent merges we had, but because of something(where we still access memory based interface after enabling x2apic mode) that slipped through my earlier tests. Probably it is a good idea to unmap the memory mapped interface, once we switch to x2apic mode. That will catch the issues much earlier. I will post another patch for this. ack_APIC_irq() is used at too many generic places (and not just during irq_chip handling!) to use the native_apic_mem_write(). For ex, this will break x2apic based systems. Fix ack_APIC_irq() to use the generic apic_write() even for 64-bit. Signed-off-by: Suresh Siddha Cc: suresh.b.siddha@intel.com Cc: yong.y.wang@linux.intel.com Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index 5133991..502ca65 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -136,11 +136,7 @@ static inline void ack_APIC_irq(void) */ /* Docs say use 0 for future compatibility */ -#ifdef CONFIG_X86_32 apic_write(APIC_EOI, 0); -#else - native_apic_mem_write(APIC_EOI, 0); -#endif } extern int lapic_get_maxlvt(void); -- cgit v0.10.2 From a656c8efb40a8700046df20da2195f8aa39ce38a Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Tue, 22 Jul 2008 21:27:11 +0200 Subject: x86: fix spurious '#' in kvm header Signed-off-by: Vegard Nossum diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index fdde0be..924e1bf 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -1,4 +1,4 @@ -#/* +/* * Kernel-based Virtual Machine driver for Linux * * This header defines architecture specific interfaces, x86 version -- cgit v0.10.2 From 77ef50a522717fa040636ee1017179ceba12ff62 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Wed, 18 Jun 2008 17:08:48 +0200 Subject: x86: consolidate header guards This patch is the result of an automatic script that consolidates the format of all the headers in include/asm-x86/. The format: 1. No leading underscore. Names with leading underscores are reserved. 2. Pathname components are separated by two underscores. So we can distinguish between mm_types.h and mm/types.h. 3. Everything except letters and numbers are turned into single underscores. Signed-off-by: Vegard Nossum diff --git a/include/asm-x86/a.out-core.h b/include/asm-x86/a.out-core.h index 714207a..f570576 100644 --- a/include/asm-x86/a.out-core.h +++ b/include/asm-x86/a.out-core.h @@ -9,8 +9,8 @@ * 2 of the Licence, or (at your option) any later version. */ -#ifndef _ASM_A_OUT_CORE_H -#define _ASM_A_OUT_CORE_H +#ifndef ASM_X86__A_OUT_CORE_H +#define ASM_X86__A_OUT_CORE_H #ifdef __KERNEL__ #ifdef CONFIG_X86_32 @@ -70,4 +70,4 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) #endif /* CONFIG_X86_32 */ #endif /* __KERNEL__ */ -#endif /* _ASM_A_OUT_CORE_H */ +#endif /* ASM_X86__A_OUT_CORE_H */ diff --git a/include/asm-x86/a.out.h b/include/asm-x86/a.out.h index 4684f97..0948748 100644 --- a/include/asm-x86/a.out.h +++ b/include/asm-x86/a.out.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_A_OUT_H -#define _ASM_X86_A_OUT_H +#ifndef ASM_X86__A_OUT_H +#define ASM_X86__A_OUT_H struct exec { @@ -17,4 +17,4 @@ struct exec #define N_DRSIZE(a) ((a).a_drsize) #define N_SYMSIZE(a) ((a).a_syms) -#endif /* _ASM_X86_A_OUT_H */ +#endif /* ASM_X86__A_OUT_H */ diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h index 635d764..bd76299 100644 --- a/include/asm-x86/acpi.h +++ b/include/asm-x86/acpi.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_ACPI_H -#define _ASM_X86_ACPI_H +#ifndef ASM_X86__ACPI_H +#define ASM_X86__ACPI_H /* * Copyright (C) 2001 Paul Diefenbaugh @@ -173,4 +173,4 @@ static inline void acpi_fake_nodes(const struct bootnode *fake_nodes, #define acpi_unlazy_tlb(x) leave_mm(x) -#endif /*__X86_ASM_ACPI_H*/ +#endif /* ASM_X86__ACPI_H */ diff --git a/include/asm-x86/agp.h b/include/asm-x86/agp.h index e4004a9..3617fd4 100644 --- a/include/asm-x86/agp.h +++ b/include/asm-x86/agp.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_AGP_H -#define _ASM_X86_AGP_H +#ifndef ASM_X86__AGP_H +#define ASM_X86__AGP_H #include #include @@ -32,4 +32,4 @@ #define free_gatt_pages(table, order) \ free_pages((unsigned long)(table), (order)) -#endif +#endif /* ASM_X86__AGP_H */ diff --git a/include/asm-x86/alternative.h b/include/asm-x86/alternative.h index f6aa18e..22d3c98 100644 --- a/include/asm-x86/alternative.h +++ b/include/asm-x86/alternative.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_ALTERNATIVE_H -#define _ASM_X86_ALTERNATIVE_H +#ifndef ASM_X86__ALTERNATIVE_H +#define ASM_X86__ALTERNATIVE_H #include #include @@ -180,4 +180,4 @@ extern void add_nops(void *insns, unsigned int len); extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke_early(void *addr, const void *opcode, size_t len); -#endif /* _ASM_X86_ALTERNATIVE_H */ +#endif /* ASM_X86__ALTERNATIVE_H */ diff --git a/include/asm-x86/amd_iommu.h b/include/asm-x86/amd_iommu.h index 30a1204..783f43e 100644 --- a/include/asm-x86/amd_iommu.h +++ b/include/asm-x86/amd_iommu.h @@ -17,8 +17,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef _ASM_X86_AMD_IOMMU_H -#define _ASM_X86_AMD_IOMMU_H +#ifndef ASM_X86__AMD_IOMMU_H +#define ASM_X86__AMD_IOMMU_H #ifdef CONFIG_AMD_IOMMU extern int amd_iommu_init(void); @@ -29,4 +29,4 @@ static inline int amd_iommu_init(void) { return -ENODEV; } static inline void amd_iommu_detect(void) { } #endif -#endif +#endif /* ASM_X86__AMD_IOMMU_H */ diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h index 22aa58c..e6b4d5b 100644 --- a/include/asm-x86/amd_iommu_types.h +++ b/include/asm-x86/amd_iommu_types.h @@ -17,8 +17,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef __AMD_IOMMU_TYPES_H__ -#define __AMD_IOMMU_TYPES_H__ +#ifndef ASM_X86__AMD_IOMMU_TYPES_H +#define ASM_X86__AMD_IOMMU_TYPES_H #include #include @@ -339,4 +339,4 @@ static inline u16 calc_devid(u8 bus, u8 devfn) return (((u16)bus) << 8) | devfn; } -#endif +#endif /* ASM_X86__AMD_IOMMU_TYPES_H */ diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index 133c998..d0ace3d 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_APIC_H -#define _ASM_X86_APIC_H +#ifndef ASM_X86__APIC_H +#define ASM_X86__APIC_H #include #include @@ -128,4 +128,4 @@ static inline void init_apic_mappings(void) { } #endif /* !CONFIG_X86_LOCAL_APIC */ -#endif /* __ASM_APIC_H */ +#endif /* ASM_X86__APIC_H */ diff --git a/include/asm-x86/apicdef.h b/include/asm-x86/apicdef.h index 6b9008c..c40687d 100644 --- a/include/asm-x86/apicdef.h +++ b/include/asm-x86/apicdef.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_APICDEF_H -#define _ASM_X86_APICDEF_H +#ifndef ASM_X86__APICDEF_H +#define ASM_X86__APICDEF_H /* * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) @@ -411,4 +411,4 @@ struct local_apic { #else #define BAD_APICID 0xFFFFu #endif -#endif +#endif /* ASM_X86__APICDEF_H */ diff --git a/include/asm-x86/arch_hooks.h b/include/asm-x86/arch_hooks.h index 8411750..72adc3a 100644 --- a/include/asm-x86/arch_hooks.h +++ b/include/asm-x86/arch_hooks.h @@ -1,5 +1,5 @@ -#ifndef _ASM_ARCH_HOOKS_H -#define _ASM_ARCH_HOOKS_H +#ifndef ASM_X86__ARCH_HOOKS_H +#define ASM_X86__ARCH_HOOKS_H #include @@ -25,4 +25,4 @@ extern void pre_time_init_hook(void); extern void time_init_hook(void); extern void mca_nmi_hook(void); -#endif +#endif /* ASM_X86__ARCH_HOOKS_H */ diff --git a/include/asm-x86/asm.h b/include/asm-x86/asm.h index 9722032..2439ae4 100644 --- a/include/asm-x86/asm.h +++ b/include/asm-x86/asm.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_ASM_H -#define _ASM_X86_ASM_H +#ifndef ASM_X86__ASM_H +#define ASM_X86__ASM_H #ifdef __ASSEMBLY__ # define __ASM_FORM(x) x @@ -39,4 +39,4 @@ _ASM_PTR #from "," #to "\n" \ " .previous\n" -#endif /* _ASM_X86_ASM_H */ +#endif /* ASM_X86__ASM_H */ diff --git a/include/asm-x86/atomic_32.h b/include/asm-x86/atomic_32.h index 21a4825..14d3f0b 100644 --- a/include/asm-x86/atomic_32.h +++ b/include/asm-x86/atomic_32.h @@ -1,5 +1,5 @@ -#ifndef __ARCH_I386_ATOMIC__ -#define __ARCH_I386_ATOMIC__ +#ifndef ASM_X86__ATOMIC_32_H +#define ASM_X86__ATOMIC_32_H #include #include @@ -256,4 +256,4 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define smp_mb__after_atomic_inc() barrier() #include -#endif +#endif /* ASM_X86__ATOMIC_32_H */ diff --git a/include/asm-x86/atomic_64.h b/include/asm-x86/atomic_64.h index a009519..ebbc753 100644 --- a/include/asm-x86/atomic_64.h +++ b/include/asm-x86/atomic_64.h @@ -1,5 +1,5 @@ -#ifndef __ARCH_X86_64_ATOMIC__ -#define __ARCH_X86_64_ATOMIC__ +#ifndef ASM_X86__ATOMIC_64_H +#define ASM_X86__ATOMIC_64_H #include #include @@ -470,4 +470,4 @@ static inline void atomic_or_long(unsigned long *v1, unsigned long v2) #define smp_mb__after_atomic_inc() barrier() #include -#endif +#endif /* ASM_X86__ATOMIC_64_H */ diff --git a/include/asm-x86/auxvec.h b/include/asm-x86/auxvec.h index 87f5e6d..12c7cac 100644 --- a/include/asm-x86/auxvec.h +++ b/include/asm-x86/auxvec.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_AUXVEC_H -#define _ASM_X86_AUXVEC_H +#ifndef ASM_X86__AUXVEC_H +#define ASM_X86__AUXVEC_H /* * Architecture-neutral AT_ values in 0-17, leave some room * for more of them, start the x86-specific ones at 32. @@ -9,4 +9,4 @@ #endif #define AT_SYSINFO_EHDR 33 -#endif +#endif /* ASM_X86__AUXVEC_H */ diff --git a/include/asm-x86/bios_ebda.h b/include/asm-x86/bios_ebda.h index 0033e50..ec42ed8 100644 --- a/include/asm-x86/bios_ebda.h +++ b/include/asm-x86/bios_ebda.h @@ -1,5 +1,5 @@ -#ifndef _MACH_BIOS_EBDA_H -#define _MACH_BIOS_EBDA_H +#ifndef ASM_X86__BIOS_EBDA_H +#define ASM_X86__BIOS_EBDA_H #include @@ -16,4 +16,4 @@ static inline unsigned int get_bios_ebda(void) void reserve_ebda_region(void); -#endif /* _MACH_BIOS_EBDA_H */ +#endif /* ASM_X86__BIOS_EBDA_H */ diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h index cfb2b64..61989b9 100644 --- a/include/asm-x86/bitops.h +++ b/include/asm-x86/bitops.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_BITOPS_H -#define _ASM_X86_BITOPS_H +#ifndef ASM_X86__BITOPS_H +#define ASM_X86__BITOPS_H /* * Copyright 1992, Linus Torvalds. @@ -458,4 +458,4 @@ static inline void set_bit_string(unsigned long *bitmap, #include #endif /* __KERNEL__ */ -#endif /* _ASM_X86_BITOPS_H */ +#endif /* ASM_X86__BITOPS_H */ diff --git a/include/asm-x86/boot.h b/include/asm-x86/boot.h index 2faed7e..825de5d 100644 --- a/include/asm-x86/boot.h +++ b/include/asm-x86/boot.h @@ -1,5 +1,5 @@ -#ifndef _ASM_BOOT_H -#define _ASM_BOOT_H +#ifndef ASM_X86__BOOT_H +#define ASM_X86__BOOT_H /* Don't touch these, unless you really know what you're doing. */ #define DEF_INITSEG 0x9000 @@ -25,4 +25,4 @@ #define BOOT_STACK_SIZE 0x1000 #endif -#endif /* _ASM_BOOT_H */ +#endif /* ASM_X86__BOOT_H */ diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h index ae22bdf..ccf027e 100644 --- a/include/asm-x86/bootparam.h +++ b/include/asm-x86/bootparam.h @@ -1,5 +1,5 @@ -#ifndef _ASM_BOOTPARAM_H -#define _ASM_BOOTPARAM_H +#ifndef ASM_X86__BOOTPARAM_H +#define ASM_X86__BOOTPARAM_H #include #include @@ -108,4 +108,4 @@ struct boot_params { __u8 _pad9[276]; /* 0xeec */ } __attribute__((packed)); -#endif /* _ASM_BOOTPARAM_H */ +#endif /* ASM_X86__BOOTPARAM_H */ diff --git a/include/asm-x86/bug.h b/include/asm-x86/bug.h index b69aa64..91ad43a 100644 --- a/include/asm-x86/bug.h +++ b/include/asm-x86/bug.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_BUG_H -#define _ASM_X86_BUG_H +#ifndef ASM_X86__BUG_H +#define ASM_X86__BUG_H #ifdef CONFIG_BUG #define HAVE_ARCH_BUG @@ -36,4 +36,4 @@ do { \ #endif /* !CONFIG_BUG */ #include -#endif +#endif /* ASM_X86__BUG_H */ diff --git a/include/asm-x86/bugs.h b/include/asm-x86/bugs.h index 021cbdd..4761c46 100644 --- a/include/asm-x86/bugs.h +++ b/include/asm-x86/bugs.h @@ -1,7 +1,7 @@ -#ifndef _ASM_X86_BUGS_H -#define _ASM_X86_BUGS_H +#ifndef ASM_X86__BUGS_H +#define ASM_X86__BUGS_H extern void check_bugs(void); int ppro_with_ram_bug(void); -#endif /* _ASM_X86_BUGS_H */ +#endif /* ASM_X86__BUGS_H */ diff --git a/include/asm-x86/byteorder.h b/include/asm-x86/byteorder.h index e02ae2d..722f27d 100644 --- a/include/asm-x86/byteorder.h +++ b/include/asm-x86/byteorder.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_BYTEORDER_H -#define _ASM_X86_BYTEORDER_H +#ifndef ASM_X86__BYTEORDER_H +#define ASM_X86__BYTEORDER_H #include #include @@ -78,4 +78,4 @@ static inline __attribute_const__ __u32 ___arch__swab32(__u32 x) #include -#endif /* _ASM_X86_BYTEORDER_H */ +#endif /* ASM_X86__BYTEORDER_H */ diff --git a/include/asm-x86/cache.h b/include/asm-x86/cache.h index 1e0bac8..ea3f1cc 100644 --- a/include/asm-x86/cache.h +++ b/include/asm-x86/cache.h @@ -1,5 +1,5 @@ -#ifndef _ARCH_X86_CACHE_H -#define _ARCH_X86_CACHE_H +#ifndef ASM_X86__CACHE_H +#define ASM_X86__CACHE_H /* L1 cache line size */ #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) @@ -17,4 +17,4 @@ #endif #endif -#endif +#endif /* ASM_X86__CACHE_H */ diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h index f4c0ab5..59859cb 100644 --- a/include/asm-x86/cacheflush.h +++ b/include/asm-x86/cacheflush.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_CACHEFLUSH_H -#define _ASM_X86_CACHEFLUSH_H +#ifndef ASM_X86__CACHEFLUSH_H +#define ASM_X86__CACHEFLUSH_H /* Keep includes the same across arches. */ #include @@ -112,4 +112,4 @@ static inline int rodata_test(void) } #endif -#endif +#endif /* ASM_X86__CACHEFLUSH_H */ diff --git a/include/asm-x86/calgary.h b/include/asm-x86/calgary.h index 67f6040..933fd27 100644 --- a/include/asm-x86/calgary.h +++ b/include/asm-x86/calgary.h @@ -21,8 +21,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef _ASM_X86_64_CALGARY_H -#define _ASM_X86_64_CALGARY_H +#ifndef ASM_X86__CALGARY_H +#define ASM_X86__CALGARY_H #include #include @@ -69,4 +69,4 @@ static inline int calgary_iommu_init(void) { return 1; } static inline void detect_calgary(void) { return; } #endif -#endif /* _ASM_X86_64_CALGARY_H */ +#endif /* ASM_X86__CALGARY_H */ diff --git a/include/asm-x86/checksum_32.h b/include/asm-x86/checksum_32.h index 52bbb0d..d041e8c 100644 --- a/include/asm-x86/checksum_32.h +++ b/include/asm-x86/checksum_32.h @@ -1,5 +1,5 @@ -#ifndef _I386_CHECKSUM_H -#define _I386_CHECKSUM_H +#ifndef ASM_X86__CHECKSUM_32_H +#define ASM_X86__CHECKSUM_32_H #include @@ -186,4 +186,4 @@ static inline __wsum csum_and_copy_to_user(const void *src, return (__force __wsum)-1; /* invalid checksum */ } -#endif +#endif /* ASM_X86__CHECKSUM_32_H */ diff --git a/include/asm-x86/checksum_64.h b/include/asm-x86/checksum_64.h index 8bd861c..110f403 100644 --- a/include/asm-x86/checksum_64.h +++ b/include/asm-x86/checksum_64.h @@ -1,5 +1,5 @@ -#ifndef _X86_64_CHECKSUM_H -#define _X86_64_CHECKSUM_H +#ifndef ASM_X86__CHECKSUM_64_H +#define ASM_X86__CHECKSUM_64_H /* * Checksums for x86-64 @@ -188,4 +188,4 @@ static inline unsigned add32_with_carry(unsigned a, unsigned b) return a; } -#endif +#endif /* ASM_X86__CHECKSUM_64_H */ diff --git a/include/asm-x86/cmpxchg_32.h b/include/asm-x86/cmpxchg_32.h index bf5a69d..0622e45 100644 --- a/include/asm-x86/cmpxchg_32.h +++ b/include/asm-x86/cmpxchg_32.h @@ -1,5 +1,5 @@ -#ifndef __ASM_CMPXCHG_H -#define __ASM_CMPXCHG_H +#ifndef ASM_X86__CMPXCHG_32_H +#define ASM_X86__CMPXCHG_32_H #include /* for LOCK_PREFIX */ @@ -341,4 +341,4 @@ extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); #endif -#endif +#endif /* ASM_X86__CMPXCHG_32_H */ diff --git a/include/asm-x86/cmpxchg_64.h b/include/asm-x86/cmpxchg_64.h index 17463cc..63c1a5e 100644 --- a/include/asm-x86/cmpxchg_64.h +++ b/include/asm-x86/cmpxchg_64.h @@ -1,5 +1,5 @@ -#ifndef __ASM_CMPXCHG_H -#define __ASM_CMPXCHG_H +#ifndef ASM_X86__CMPXCHG_64_H +#define ASM_X86__CMPXCHG_64_H #include /* Provides LOCK_PREFIX */ @@ -182,4 +182,4 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, cmpxchg_local((ptr), (o), (n)); \ }) -#endif +#endif /* ASM_X86__CMPXCHG_64_H */ diff --git a/include/asm-x86/compat.h b/include/asm-x86/compat.h index 1793ac3..6732b15 100644 --- a/include/asm-x86/compat.h +++ b/include/asm-x86/compat.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_64_COMPAT_H -#define _ASM_X86_64_COMPAT_H +#ifndef ASM_X86__COMPAT_H +#define ASM_X86__COMPAT_H /* * Architecture specific compatibility types @@ -215,4 +215,4 @@ static inline int is_compat_task(void) return current_thread_info()->status & TS_COMPAT; } -#endif /* _ASM_X86_64_COMPAT_H */ +#endif /* ASM_X86__COMPAT_H */ diff --git a/include/asm-x86/cpu.h b/include/asm-x86/cpu.h index 73f2ea8..83a1150 100644 --- a/include/asm-x86/cpu.h +++ b/include/asm-x86/cpu.h @@ -1,5 +1,5 @@ -#ifndef _ASM_I386_CPU_H_ -#define _ASM_I386_CPU_H_ +#ifndef ASM_X86__CPU_H +#define ASM_X86__CPU_H #include #include @@ -17,4 +17,4 @@ extern void arch_unregister_cpu(int); #endif DECLARE_PER_CPU(int, cpu_state); -#endif /* _ASM_I386_CPU_H_ */ +#endif /* ASM_X86__CPU_H */ diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 2f5a792..2f3143e 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -1,8 +1,8 @@ /* * Defines x86 CPU feature bits */ -#ifndef _ASM_X86_CPUFEATURE_H -#define _ASM_X86_CPUFEATURE_H +#ifndef ASM_X86__CPUFEATURE_H +#define ASM_X86__CPUFEATURE_H #include @@ -220,4 +220,4 @@ extern const char * const x86_power_flags[32]; #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ -#endif /* _ASM_X86_CPUFEATURE_H */ +#endif /* ASM_X86__CPUFEATURE_H */ diff --git a/include/asm-x86/current.h b/include/asm-x86/current.h index 7515c19..a863ead 100644 --- a/include/asm-x86/current.h +++ b/include/asm-x86/current.h @@ -1,5 +1,5 @@ -#ifndef _X86_CURRENT_H -#define _X86_CURRENT_H +#ifndef ASM_X86__CURRENT_H +#define ASM_X86__CURRENT_H #ifdef CONFIG_X86_32 #include @@ -36,4 +36,4 @@ static __always_inline struct task_struct *get_current(void) #define current get_current() -#endif /* X86_CURRENT_H */ +#endif /* ASM_X86__CURRENT_H */ diff --git a/include/asm-x86/debugreg.h b/include/asm-x86/debugreg.h index c6344d5..ecb6907 100644 --- a/include/asm-x86/debugreg.h +++ b/include/asm-x86/debugreg.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_DEBUGREG_H -#define _ASM_X86_DEBUGREG_H +#ifndef ASM_X86__DEBUGREG_H +#define ASM_X86__DEBUGREG_H /* Indicate the register numbers for a number of the specific @@ -67,4 +67,4 @@ #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ -#endif +#endif /* ASM_X86__DEBUGREG_H */ diff --git a/include/asm-x86/delay.h b/include/asm-x86/delay.h index 409a649..8a0da95 100644 --- a/include/asm-x86/delay.h +++ b/include/asm-x86/delay.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_DELAY_H -#define _ASM_X86_DELAY_H +#ifndef ASM_X86__DELAY_H +#define ASM_X86__DELAY_H /* * Copyright (C) 1993 Linus Torvalds @@ -28,4 +28,4 @@ extern void __delay(unsigned long loops); void use_tsc_delay(void); -#endif /* _ASM_X86_DELAY_H */ +#endif /* ASM_X86__DELAY_H */ diff --git a/include/asm-x86/desc_defs.h b/include/asm-x86/desc_defs.h index f7bacf3..b881db6 100644 --- a/include/asm-x86/desc_defs.h +++ b/include/asm-x86/desc_defs.h @@ -1,6 +1,6 @@ /* Written 2000 by Andi Kleen */ -#ifndef __ARCH_DESC_DEFS_H -#define __ARCH_DESC_DEFS_H +#ifndef ASM_X86__DESC_DEFS_H +#define ASM_X86__DESC_DEFS_H /* * Segment descriptor structure definitions, usable from both x86_64 and i386 @@ -92,4 +92,4 @@ struct desc_ptr { #endif /* !__ASSEMBLY__ */ -#endif +#endif /* ASM_X86__DESC_DEFS_H */ diff --git a/include/asm-x86/device.h b/include/asm-x86/device.h index 87a7153..dbe8855 100644 --- a/include/asm-x86/device.h +++ b/include/asm-x86/device.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_DEVICE_H -#define _ASM_X86_DEVICE_H +#ifndef ASM_X86__DEVICE_H +#define ASM_X86__DEVICE_H struct dev_archdata { #ifdef CONFIG_ACPI @@ -10,4 +10,4 @@ struct dev_archdata { #endif }; -#endif /* _ASM_X86_DEVICE_H */ +#endif /* ASM_X86__DEVICE_H */ diff --git a/include/asm-x86/div64.h b/include/asm-x86/div64.h index 9a2d644..f9530f2 100644 --- a/include/asm-x86/div64.h +++ b/include/asm-x86/div64.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_DIV64_H -#define _ASM_X86_DIV64_H +#ifndef ASM_X86__DIV64_H +#define ASM_X86__DIV64_H #ifdef CONFIG_X86_32 @@ -57,4 +57,4 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) # include #endif /* CONFIG_X86_32 */ -#endif /* _ASM_X86_DIV64_H */ +#endif /* ASM_X86__DIV64_H */ diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index c2ddd3d..71b6f7d 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -1,5 +1,5 @@ -#ifndef _ASM_DMA_MAPPING_H_ -#define _ASM_DMA_MAPPING_H_ +#ifndef ASM_X86__DMA_MAPPING_H +#define ASM_X86__DMA_MAPPING_H /* * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for @@ -233,4 +233,4 @@ extern void * dma_mark_declared_memory_occupied(struct device *dev, dma_addr_t device_addr, size_t size); #endif /* CONFIG_X86_32 */ -#endif +#endif /* ASM_X86__DMA_MAPPING_H */ diff --git a/include/asm-x86/dma.h b/include/asm-x86/dma.h index ca1098a..c9f7a4e 100644 --- a/include/asm-x86/dma.h +++ b/include/asm-x86/dma.h @@ -5,8 +5,8 @@ * and John Boyd, Nov. 1992. */ -#ifndef _ASM_X86_DMA_H -#define _ASM_X86_DMA_H +#ifndef ASM_X86__DMA_H +#define ASM_X86__DMA_H #include /* And spinlocks */ #include /* need byte IO */ @@ -315,4 +315,4 @@ extern int isa_dma_bridge_buggy; #define isa_dma_bridge_buggy (0) #endif -#endif /* _ASM_X86_DMA_H */ +#endif /* ASM_X86__DMA_H */ diff --git a/include/asm-x86/dmi.h b/include/asm-x86/dmi.h index 58a8657..1cff6fe 100644 --- a/include/asm-x86/dmi.h +++ b/include/asm-x86/dmi.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_DMI_H -#define _ASM_X86_DMI_H +#ifndef ASM_X86__DMI_H +#define ASM_X86__DMI_H #include @@ -23,4 +23,4 @@ static inline void *dmi_alloc(unsigned len) #define dmi_ioremap early_ioremap #define dmi_iounmap early_iounmap -#endif +#endif /* ASM_X86__DMI_H */ diff --git a/include/asm-x86/ds.h b/include/asm-x86/ds.h index 7881368..6b27c68 100644 --- a/include/asm-x86/ds.h +++ b/include/asm-x86/ds.h @@ -17,8 +17,8 @@ * Markus Metzger , Dec 2007 */ -#ifndef _ASM_X86_DS_H -#define _ASM_X86_DS_H +#ifndef ASM_X86__DS_H +#define ASM_X86__DS_H #include #include @@ -69,4 +69,4 @@ extern int ds_write_bts(void *, const struct bts_struct *); extern unsigned long ds_debugctl_mask(void); extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *c); -#endif /* _ASM_X86_DS_H */ +#endif /* ASM_X86__DS_H */ diff --git a/include/asm-x86/dwarf2.h b/include/asm-x86/dwarf2.h index 738bb9f..21d1bc3 100644 --- a/include/asm-x86/dwarf2.h +++ b/include/asm-x86/dwarf2.h @@ -1,5 +1,5 @@ -#ifndef _DWARF2_H -#define _DWARF2_H +#ifndef ASM_X86__DWARF2_H +#define ASM_X86__DWARF2_H #ifndef __ASSEMBLY__ #warning "asm/dwarf2.h should be only included in pure assembly files" @@ -58,4 +58,4 @@ #endif -#endif +#endif /* ASM_X86__DWARF2_H */ diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index 16a31e2..38741ac 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -1,5 +1,5 @@ -#ifndef __ASM_E820_H -#define __ASM_E820_H +#ifndef ASM_X86__E820_H +#define ASM_X86__E820_H #define E820MAP 0x2d0 /* our map */ #define E820MAX 128 /* number of entries in E820MAP */ @@ -140,4 +140,4 @@ extern char *memory_setup(void); #define HIGH_MEMORY (1024*1024) #endif /* __KERNEL__ */ -#endif /* __ASM_E820_H */ +#endif /* ASM_X86__E820_H */ diff --git a/include/asm-x86/edac.h b/include/asm-x86/edac.h index a8088f6..9493c5b 100644 --- a/include/asm-x86/edac.h +++ b/include/asm-x86/edac.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_EDAC_H -#define _ASM_X86_EDAC_H +#ifndef ASM_X86__EDAC_H +#define ASM_X86__EDAC_H /* ECC atomic, DMA, SMP and interrupt safe scrub function */ @@ -15,4 +15,4 @@ static inline void atomic_scrub(void *va, u32 size) asm volatile("lock; addl $0, %0"::"m" (*virt_addr)); } -#endif +#endif /* ASM_X86__EDAC_H */ diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h index 7ed2bd7..69c7b7a 100644 --- a/include/asm-x86/efi.h +++ b/include/asm-x86/efi.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_EFI_H -#define _ASM_X86_EFI_H +#ifndef ASM_X86__EFI_H +#define ASM_X86__EFI_H #ifdef CONFIG_X86_32 @@ -94,4 +94,4 @@ extern void efi_reserve_early(void); extern void efi_call_phys_prelog(void); extern void efi_call_phys_epilog(void); -#endif +#endif /* ASM_X86__EFI_H */ diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h index 7be4733..cd678b2 100644 --- a/include/asm-x86/elf.h +++ b/include/asm-x86/elf.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_ELF_H -#define _ASM_X86_ELF_H +#ifndef ASM_X86__ELF_H +#define ASM_X86__ELF_H /* * ELF register definitions.. @@ -332,4 +332,4 @@ extern int syscall32_setup_pages(struct linux_binprm *, int exstack); extern unsigned long arch_randomize_brk(struct mm_struct *mm); #define arch_randomize_brk arch_randomize_brk -#endif +#endif /* ASM_X86__ELF_H */ diff --git a/include/asm-x86/emergency-restart.h b/include/asm-x86/emergency-restart.h index 8e6aef1..190d0d8 100644 --- a/include/asm-x86/emergency-restart.h +++ b/include/asm-x86/emergency-restart.h @@ -1,5 +1,5 @@ -#ifndef _ASM_EMERGENCY_RESTART_H -#define _ASM_EMERGENCY_RESTART_H +#ifndef ASM_X86__EMERGENCY_RESTART_H +#define ASM_X86__EMERGENCY_RESTART_H enum reboot_type { BOOT_TRIPLE = 't', @@ -15,4 +15,4 @@ extern enum reboot_type reboot_type; extern void machine_emergency_restart(void); -#endif /* _ASM_EMERGENCY_RESTART_H */ +#endif /* ASM_X86__EMERGENCY_RESTART_H */ diff --git a/include/asm-x86/fb.h b/include/asm-x86/fb.h index 5301846..aca38dbd 100644 --- a/include/asm-x86/fb.h +++ b/include/asm-x86/fb.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_FB_H -#define _ASM_X86_FB_H +#ifndef ASM_X86__FB_H +#define ASM_X86__FB_H #include #include @@ -18,4 +18,4 @@ extern int fb_is_primary_device(struct fb_info *info); static inline int fb_is_primary_device(struct fb_info *info) { return 0; } #endif -#endif /* _ASM_X86_FB_H */ +#endif /* ASM_X86__FB_H */ diff --git a/include/asm-x86/fixmap.h b/include/asm-x86/fixmap.h index 44d4f82..78e33a1 100644 --- a/include/asm-x86/fixmap.h +++ b/include/asm-x86/fixmap.h @@ -1,5 +1,5 @@ -#ifndef _ASM_FIXMAP_H -#define _ASM_FIXMAP_H +#ifndef ASM_X86__FIXMAP_H +#define ASM_X86__FIXMAP_H #ifdef CONFIG_X86_32 # include "fixmap_32.h" @@ -65,4 +65,4 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); return __virt_to_fix(vaddr); } -#endif +#endif /* ASM_X86__FIXMAP_H */ diff --git a/include/asm-x86/fixmap_32.h b/include/asm-x86/fixmap_32.h index f1ac2b2..784e3e7 100644 --- a/include/asm-x86/fixmap_32.h +++ b/include/asm-x86/fixmap_32.h @@ -10,8 +10,8 @@ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 */ -#ifndef _ASM_FIXMAP_32_H -#define _ASM_FIXMAP_32_H +#ifndef ASM_X86__FIXMAP_32_H +#define ASM_X86__FIXMAP_32_H /* used by vmalloc.c, vsyscall.lds.S. @@ -120,4 +120,4 @@ extern void reserve_top_address(unsigned long reserve); #define FIXADDR_BOOT_START (FIXADDR_TOP - __FIXADDR_BOOT_SIZE) #endif /* !__ASSEMBLY__ */ -#endif +#endif /* ASM_X86__FIXMAP_32_H */ diff --git a/include/asm-x86/fixmap_64.h b/include/asm-x86/fixmap_64.h index 00f3d74..dafb24b 100644 --- a/include/asm-x86/fixmap_64.h +++ b/include/asm-x86/fixmap_64.h @@ -8,8 +8,8 @@ * Copyright (C) 1998 Ingo Molnar */ -#ifndef _ASM_FIXMAP_64_H -#define _ASM_FIXMAP_64_H +#ifndef ASM_X86__FIXMAP_64_H +#define ASM_X86__FIXMAP_64_H #include #include @@ -80,4 +80,4 @@ enum fixed_addresses { #define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) #define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) -#endif +#endif /* ASM_X86__FIXMAP_64_H */ diff --git a/include/asm-x86/floppy.h b/include/asm-x86/floppy.h index dbe82a5..7d83a3a 100644 --- a/include/asm-x86/floppy.h +++ b/include/asm-x86/floppy.h @@ -7,8 +7,8 @@ * * Copyright (C) 1995 */ -#ifndef _ASM_X86_FLOPPY_H -#define _ASM_X86_FLOPPY_H +#ifndef ASM_X86__FLOPPY_H +#define ASM_X86__FLOPPY_H #include @@ -278,4 +278,4 @@ static int FDC2 = -1; #define EXTRA_FLOPPY_PARAMS -#endif /* _ASM_X86_FLOPPY_H */ +#endif /* ASM_X86__FLOPPY_H */ diff --git a/include/asm-x86/ftrace.h b/include/asm-x86/ftrace.h index 5c68b32..be0e004 100644 --- a/include/asm-x86/ftrace.h +++ b/include/asm-x86/ftrace.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_FTRACE -#define _ASM_X86_FTRACE +#ifndef ASM_X86__FTRACE_H +#define ASM_X86__FTRACE_H #ifdef CONFIG_FTRACE #define MCOUNT_ADDR ((long)(mcount)) @@ -11,4 +11,4 @@ extern void mcount(void); #endif /* CONFIG_FTRACE */ -#endif /* _ASM_X86_FTRACE */ +#endif /* ASM_X86__FTRACE_H */ diff --git a/include/asm-x86/futex.h b/include/asm-x86/futex.h index e7a76b3..45dc24d 100644 --- a/include/asm-x86/futex.h +++ b/include/asm-x86/futex.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_FUTEX_H -#define _ASM_X86_FUTEX_H +#ifndef ASM_X86__FUTEX_H +#define ASM_X86__FUTEX_H #ifdef __KERNEL__ @@ -137,4 +137,4 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, } #endif -#endif +#endif /* ASM_X86__FUTEX_H */ diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h index 3f62a83..07f4458 100644 --- a/include/asm-x86/gart.h +++ b/include/asm-x86/gart.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X8664_GART_H -#define _ASM_X8664_GART_H 1 +#ifndef ASM_X86__GART_H +#define ASM_X86__GART_H #include @@ -68,4 +68,4 @@ static inline int aperture_valid(u64 aper_base, u32 aper_size, u32 min_size) return 1; } -#endif +#endif /* ASM_X86__GART_H */ diff --git a/include/asm-x86/genapic_32.h b/include/asm-x86/genapic_32.h index b02ea6e..4904c67 100644 --- a/include/asm-x86/genapic_32.h +++ b/include/asm-x86/genapic_32.h @@ -1,5 +1,5 @@ -#ifndef _ASM_GENAPIC_H -#define _ASM_GENAPIC_H 1 +#ifndef ASM_X86__GENAPIC_32_H +#define ASM_X86__GENAPIC_32_H #include @@ -120,4 +120,4 @@ enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; #define uv_wakeup_secondary(a, b) 1 -#endif +#endif /* ASM_X86__GENAPIC_32_H */ diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index 0f85046..381a09d 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -1,5 +1,5 @@ -#ifndef _ASM_GENAPIC_H -#define _ASM_GENAPIC_H 1 +#ifndef ASM_X86__GENAPIC_64_H +#define ASM_X86__GENAPIC_64_H /* * Copyright 2004 James Cleverdon, IBM. @@ -46,4 +46,4 @@ extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip); extern void setup_apic_routing(void); -#endif +#endif /* ASM_X86__GENAPIC_64_H */ diff --git a/include/asm-x86/geode.h b/include/asm-x86/geode.h index bb06027..1ef738e 100644 --- a/include/asm-x86/geode.h +++ b/include/asm-x86/geode.h @@ -7,8 +7,8 @@ * as published by the Free Software Foundation. */ -#ifndef _ASM_GEODE_H_ -#define _ASM_GEODE_H_ +#ifndef ASM_X86__GEODE_H +#define ASM_X86__GEODE_H #include #include @@ -249,4 +249,4 @@ extern int __init mfgpt_timer_setup(void); static inline int mfgpt_timer_setup(void) { return 0; } #endif -#endif +#endif /* ASM_X86__GEODE_H */ diff --git a/include/asm-x86/gpio.h b/include/asm-x86/gpio.h index ff87fca..f269ff9 100644 --- a/include/asm-x86/gpio.h +++ b/include/asm-x86/gpio.h @@ -1,6 +1,6 @@ -#ifndef _ASM_I386_GPIO_H -#define _ASM_I386_GPIO_H +#ifndef ASM_X86__GPIO_H +#define ASM_X86__GPIO_H #include -#endif /* _ASM_I386_GPIO_H */ +#endif /* ASM_X86__GPIO_H */ diff --git a/include/asm-x86/hardirq_32.h b/include/asm-x86/hardirq_32.h index 4f85f0f..700fe23 100644 --- a/include/asm-x86/hardirq_32.h +++ b/include/asm-x86/hardirq_32.h @@ -1,5 +1,5 @@ -#ifndef __ASM_HARDIRQ_H -#define __ASM_HARDIRQ_H +#ifndef ASM_X86__HARDIRQ_32_H +#define ASM_X86__HARDIRQ_32_H #include #include @@ -25,4 +25,4 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat); void ack_bad_irq(unsigned int irq); #include -#endif /* __ASM_HARDIRQ_H */ +#endif /* ASM_X86__HARDIRQ_32_H */ diff --git a/include/asm-x86/hardirq_64.h b/include/asm-x86/hardirq_64.h index 95d5e09..f8bd291 100644 --- a/include/asm-x86/hardirq_64.h +++ b/include/asm-x86/hardirq_64.h @@ -1,5 +1,5 @@ -#ifndef __ASM_HARDIRQ_H -#define __ASM_HARDIRQ_H +#ifndef ASM_X86__HARDIRQ_64_H +#define ASM_X86__HARDIRQ_64_H #include #include @@ -20,4 +20,4 @@ extern void ack_bad_irq(unsigned int irq); -#endif /* __ASM_HARDIRQ_H */ +#endif /* ASM_X86__HARDIRQ_64_H */ diff --git a/include/asm-x86/highmem.h b/include/asm-x86/highmem.h index 4514b16..bc3f6a2 100644 --- a/include/asm-x86/highmem.h +++ b/include/asm-x86/highmem.h @@ -15,8 +15,8 @@ * Copyright (C) 1999 Ingo Molnar */ -#ifndef _ASM_HIGHMEM_H -#define _ASM_HIGHMEM_H +#ifndef ASM_X86__HIGHMEM_H +#define ASM_X86__HIGHMEM_H #ifdef __KERNEL__ @@ -79,4 +79,4 @@ extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, #endif /* __KERNEL__ */ -#endif /* _ASM_HIGHMEM_H */ +#endif /* ASM_X86__HIGHMEM_H */ diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h index 82f1ac6..cbbbb6d 100644 --- a/include/asm-x86/hpet.h +++ b/include/asm-x86/hpet.h @@ -1,5 +1,5 @@ -#ifndef ASM_X86_HPET_H -#define ASM_X86_HPET_H +#ifndef ASM_X86__HPET_H +#define ASM_X86__HPET_H #ifdef CONFIG_HPET_TIMER @@ -90,4 +90,4 @@ static inline int is_hpet_enabled(void) { return 0; } #define hpet_readl(a) 0 #endif -#endif /* ASM_X86_HPET_H */ +#endif /* ASM_X86__HPET_H */ diff --git a/include/asm-x86/hugetlb.h b/include/asm-x86/hugetlb.h index 14171a4..0e0a082 100644 --- a/include/asm-x86/hugetlb.h +++ b/include/asm-x86/hugetlb.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_HUGETLB_H -#define _ASM_X86_HUGETLB_H +#ifndef ASM_X86__HUGETLB_H +#define ASM_X86__HUGETLB_H #include @@ -88,4 +88,4 @@ static inline void arch_release_hugepage(struct page *page) { } -#endif /* _ASM_X86_HUGETLB_H */ +#endif /* ASM_X86__HUGETLB_H */ diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index 77ba51d..3f3c465 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -1,5 +1,5 @@ -#ifndef _ASM_HW_IRQ_H -#define _ASM_HW_IRQ_H +#ifndef ASM_X86__HW_IRQ_H +#define ASM_X86__HW_IRQ_H /* * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar @@ -104,4 +104,4 @@ extern void setup_vector_irq(int cpu); #endif /* !ASSEMBLY_ */ -#endif +#endif /* ASM_X86__HW_IRQ_H */ diff --git a/include/asm-x86/hypertransport.h b/include/asm-x86/hypertransport.h index d2bbd23..cc011a3 100644 --- a/include/asm-x86/hypertransport.h +++ b/include/asm-x86/hypertransport.h @@ -1,5 +1,5 @@ -#ifndef ASM_HYPERTRANSPORT_H -#define ASM_HYPERTRANSPORT_H +#ifndef ASM_X86__HYPERTRANSPORT_H +#define ASM_X86__HYPERTRANSPORT_H /* * Constants for x86 Hypertransport Interrupts. @@ -42,4 +42,4 @@ #define HT_IRQ_HIGH_DEST_ID(v) \ ((((v) >> 8) << HT_IRQ_HIGH_DEST_ID_SHIFT) & HT_IRQ_HIGH_DEST_ID_MASK) -#endif /* ASM_HYPERTRANSPORT_H */ +#endif /* ASM_X86__HYPERTRANSPORT_H */ diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index 37672f7..3e83cbb 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h @@ -7,8 +7,8 @@ * x86-64 work by Andi Kleen 2002 */ -#ifndef _ASM_X86_I387_H -#define _ASM_X86_I387_H +#ifndef ASM_X86__I387_H +#define ASM_X86__I387_H #include #include @@ -360,4 +360,4 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) } } -#endif /* _ASM_X86_I387_H */ +#endif /* ASM_X86__I387_H */ diff --git a/include/asm-x86/i8253.h b/include/asm-x86/i8253.h index b51c048..15a5b53 100644 --- a/include/asm-x86/i8253.h +++ b/include/asm-x86/i8253.h @@ -1,5 +1,5 @@ -#ifndef __ASM_I8253_H__ -#define __ASM_I8253_H__ +#ifndef ASM_X86__I8253_H +#define ASM_X86__I8253_H /* i8253A PIT registers */ #define PIT_MODE 0x43 @@ -15,4 +15,4 @@ extern void setup_pit_timer(void); #define inb_pit inb_p #define outb_pit outb_p -#endif /* __ASM_I8253_H__ */ +#endif /* ASM_X86__I8253_H */ diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h index 2f98df9..c586559 100644 --- a/include/asm-x86/i8259.h +++ b/include/asm-x86/i8259.h @@ -1,5 +1,5 @@ -#ifndef __ASM_I8259_H__ -#define __ASM_I8259_H__ +#ifndef ASM_X86__I8259_H +#define ASM_X86__I8259_H #include @@ -57,4 +57,4 @@ static inline void outb_pic(unsigned char value, unsigned int port) extern struct irq_chip i8259A_chip; -#endif /* __ASM_I8259_H__ */ +#endif /* ASM_X86__I8259_H */ diff --git a/include/asm-x86/ia32.h b/include/asm-x86/ia32.h index 55d3abe..f932f7a 100644 --- a/include/asm-x86/ia32.h +++ b/include/asm-x86/ia32.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_64_IA32_H -#define _ASM_X86_64_IA32_H +#ifndef ASM_X86__IA32_H +#define ASM_X86__IA32_H #ifdef CONFIG_IA32_EMULATION @@ -167,4 +167,4 @@ extern void ia32_pick_mmap_layout(struct mm_struct *mm); #endif /* !CONFIG_IA32_SUPPORT */ -#endif +#endif /* ASM_X86__IA32_H */ diff --git a/include/asm-x86/ia32_unistd.h b/include/asm-x86/ia32_unistd.h index 61cea9e..dbd887d 100644 --- a/include/asm-x86/ia32_unistd.h +++ b/include/asm-x86/ia32_unistd.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_64_IA32_UNISTD_H_ -#define _ASM_X86_64_IA32_UNISTD_H_ +#ifndef ASM_X86__IA32_UNISTD_H +#define ASM_X86__IA32_UNISTD_H /* * This file contains the system call numbers of the ia32 port, @@ -15,4 +15,4 @@ #define __NR_ia32_sigreturn 119 #define __NR_ia32_rt_sigreturn 173 -#endif /* _ASM_X86_64_IA32_UNISTD_H_ */ +#endif /* ASM_X86__IA32_UNISTD_H */ diff --git a/include/asm-x86/ide.h b/include/asm-x86/ide.h index cf9c98e..8bf3cdc 100644 --- a/include/asm-x86/ide.h +++ b/include/asm-x86/ide.h @@ -6,8 +6,8 @@ * This file contains the i386 architecture specific IDE code. */ -#ifndef __ASMi386_IDE_H -#define __ASMi386_IDE_H +#ifndef ASM_X86__IDE_H +#define ASM_X86__IDE_H #ifdef __KERNEL__ @@ -62,4 +62,4 @@ static __inline__ unsigned long ide_default_io_base(int index) #endif /* __KERNEL__ */ -#endif /* __ASMi386_IDE_H */ +#endif /* ASM_X86__IDE_H */ diff --git a/include/asm-x86/idle.h b/include/asm-x86/idle.h index d240e5b..dc9c794 100644 --- a/include/asm-x86/idle.h +++ b/include/asm-x86/idle.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_64_IDLE_H -#define _ASM_X86_64_IDLE_H 1 +#ifndef ASM_X86__IDLE_H +#define ASM_X86__IDLE_H #define IDLE_START 1 #define IDLE_END 2 @@ -10,4 +10,4 @@ void idle_notifier_register(struct notifier_block *n); void enter_idle(void); void exit_idle(void); -#endif +#endif /* ASM_X86__IDLE_H */ diff --git a/include/asm-x86/intel_arch_perfmon.h b/include/asm-x86/intel_arch_perfmon.h index fa0fd06..07c03c6 100644 --- a/include/asm-x86/intel_arch_perfmon.h +++ b/include/asm-x86/intel_arch_perfmon.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_INTEL_ARCH_PERFMON_H -#define _ASM_X86_INTEL_ARCH_PERFMON_H +#ifndef ASM_X86__INTEL_ARCH_PERFMON_H +#define ASM_X86__INTEL_ARCH_PERFMON_H #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 @@ -28,4 +28,4 @@ union cpuid10_eax { unsigned int full; }; -#endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */ +#endif /* ASM_X86__INTEL_ARCH_PERFMON_H */ diff --git a/include/asm-x86/io.h b/include/asm-x86/io.h index bf5d629..1b75a43 100644 --- a/include/asm-x86/io.h +++ b/include/asm-x86/io.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_IO_H -#define _ASM_X86_IO_H +#ifndef ASM_X86__IO_H +#define ASM_X86__IO_H #define ARCH_HAS_IOREMAP_WC @@ -99,4 +99,4 @@ extern void early_iounmap(void *addr, unsigned long size); extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); -#endif /* _ASM_X86_IO_H */ +#endif /* ASM_X86__IO_H */ diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h index 4df44ed..d9d1e3f 100644 --- a/include/asm-x86/io_32.h +++ b/include/asm-x86/io_32.h @@ -1,5 +1,5 @@ -#ifndef _ASM_IO_H -#define _ASM_IO_H +#ifndef ASM_X86__IO_32_H +#define ASM_X86__IO_32_H #include #include @@ -279,4 +279,4 @@ BUILDIO(b, b, char) BUILDIO(w, w, short) BUILDIO(l, , int) -#endif +#endif /* ASM_X86__IO_32_H */ diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h index ddd8058..00ac472 100644 --- a/include/asm-x86/io_64.h +++ b/include/asm-x86/io_64.h @@ -1,5 +1,5 @@ -#ifndef _ASM_IO_H -#define _ASM_IO_H +#ifndef ASM_X86__IO_64_H +#define ASM_X86__IO_64_H /* @@ -243,4 +243,4 @@ extern int iommu_bio_merge; #endif /* __KERNEL__ */ -#endif +#endif /* ASM_X86__IO_64_H */ diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index 14f82bb..be62847 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -1,5 +1,5 @@ -#ifndef __ASM_IO_APIC_H -#define __ASM_IO_APIC_H +#ifndef ASM_X86__IO_APIC_H +#define ASM_X86__IO_APIC_H #include #include @@ -189,4 +189,4 @@ static const int timer_through_8259 = 0; static inline void ioapic_init_mappings(void) { } #endif -#endif +#endif /* ASM_X86__IO_APIC_H */ diff --git a/include/asm-x86/ioctls.h b/include/asm-x86/ioctls.h index c0c338b..3366035 100644 --- a/include/asm-x86/ioctls.h +++ b/include/asm-x86/ioctls.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_IOCTLS_H -#define _ASM_X86_IOCTLS_H +#ifndef ASM_X86__IOCTLS_H +#define ASM_X86__IOCTLS_H #include @@ -85,4 +85,4 @@ #define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ -#endif +#endif /* ASM_X86__IOCTLS_H */ diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h index d63166f..4f3d212 100644 --- a/include/asm-x86/iommu.h +++ b/include/asm-x86/iommu.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X8664_IOMMU_H -#define _ASM_X8664_IOMMU_H 1 +#ifndef ASM_X86__IOMMU_H +#define ASM_X86__IOMMU_H extern void pci_iommu_shutdown(void); extern void no_iommu_init(void); @@ -39,4 +39,4 @@ static inline void gart_iommu_hole_init(void) } #endif -#endif +#endif /* ASM_X86__IOMMU_H */ diff --git a/include/asm-x86/ipcbuf.h b/include/asm-x86/ipcbuf.h index ee678fd..910304f 100644 --- a/include/asm-x86/ipcbuf.h +++ b/include/asm-x86/ipcbuf.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_IPCBUF_H -#define _ASM_X86_IPCBUF_H +#ifndef ASM_X86__IPCBUF_H +#define ASM_X86__IPCBUF_H /* * The ipc64_perm structure for x86 architecture. @@ -25,4 +25,4 @@ struct ipc64_perm { unsigned long __unused2; }; -#endif /* _ASM_X86_IPCBUF_H */ +#endif /* ASM_X86__IPCBUF_H */ diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h index 196d63c..63390ea 100644 --- a/include/asm-x86/ipi.h +++ b/include/asm-x86/ipi.h @@ -1,5 +1,5 @@ -#ifndef __ASM_IPI_H -#define __ASM_IPI_H +#ifndef ASM_X86__IPI_H +#define ASM_X86__IPI_H /* * Copyright 2004 James Cleverdon, IBM. @@ -129,4 +129,4 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) local_irq_restore(flags); } -#endif /* __ASM_IPI_H */ +#endif /* ASM_X86__IPI_H */ diff --git a/include/asm-x86/irq.h b/include/asm-x86/irq.h index 1a29257..1e5f290 100644 --- a/include/asm-x86/irq.h +++ b/include/asm-x86/irq.h @@ -1,5 +1,5 @@ -#ifndef _ASM_IRQ_H -#define _ASM_IRQ_H +#ifndef ASM_X86__IRQ_H +#define ASM_X86__IRQ_H /* * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar * @@ -47,4 +47,4 @@ extern void native_init_IRQ(void); /* Interrupt vector management */ extern DECLARE_BITMAP(used_vectors, NR_VECTORS); -#endif /* _ASM_IRQ_H */ +#endif /* ASM_X86__IRQ_H */ diff --git a/include/asm-x86/irq_regs_32.h b/include/asm-x86/irq_regs_32.h index 3368b20..316a3b2 100644 --- a/include/asm-x86/irq_regs_32.h +++ b/include/asm-x86/irq_regs_32.h @@ -4,8 +4,8 @@ * * Jeremy Fitzhardinge */ -#ifndef _ASM_I386_IRQ_REGS_H -#define _ASM_I386_IRQ_REGS_H +#ifndef ASM_X86__IRQ_REGS_32_H +#define ASM_X86__IRQ_REGS_32_H #include @@ -26,4 +26,4 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) return old_regs; } -#endif /* _ASM_I386_IRQ_REGS_H */ +#endif /* ASM_X86__IRQ_REGS_32_H */ diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h index 90b1d1f..646d59f 100644 --- a/include/asm-x86/irq_vectors.h +++ b/include/asm-x86/irq_vectors.h @@ -1,5 +1,5 @@ -#ifndef _ASM_IRQ_VECTORS_H -#define _ASM_IRQ_VECTORS_H +#ifndef ASM_X86__IRQ_VECTORS_H +#define ASM_X86__IRQ_VECTORS_H #include @@ -170,4 +170,4 @@ #define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8) -#endif /* _ASM_IRQ_VECTORS_H */ +#endif /* ASM_X86__IRQ_VECTORS_H */ diff --git a/include/asm-x86/ist.h b/include/asm-x86/ist.h index 6ec6cee..35a2fe9 100644 --- a/include/asm-x86/ist.h +++ b/include/asm-x86/ist.h @@ -1,5 +1,5 @@ -#ifndef _ASM_IST_H -#define _ASM_IST_H +#ifndef ASM_X86__IST_H +#define ASM_X86__IST_H /* * Include file for the interface to IST BIOS @@ -31,4 +31,4 @@ struct ist_info { extern struct ist_info ist_info; #endif /* __KERNEL__ */ -#endif /* _ASM_IST_H */ +#endif /* ASM_X86__IST_H */ diff --git a/include/asm-x86/k8.h b/include/asm-x86/k8.h index 452e2b6..2bbaf43 100644 --- a/include/asm-x86/k8.h +++ b/include/asm-x86/k8.h @@ -1,5 +1,5 @@ -#ifndef _ASM_K8_H -#define _ASM_K8_H 1 +#ifndef ASM_X86__K8_H +#define ASM_X86__K8_H #include @@ -12,4 +12,4 @@ extern int cache_k8_northbridges(void); extern void k8_flush_garts(void); extern int k8_scan_nodes(unsigned long start, unsigned long end); -#endif +#endif /* ASM_X86__K8_H */ diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h index 96651bb..5ec3ad3 100644 --- a/include/asm-x86/kdebug.h +++ b/include/asm-x86/kdebug.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_KDEBUG_H -#define _ASM_X86_KDEBUG_H +#ifndef ASM_X86__KDEBUG_H +#define ASM_X86__KDEBUG_H #include @@ -35,4 +35,4 @@ extern void show_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); -#endif +#endif /* ASM_X86__KDEBUG_H */ diff --git a/include/asm-x86/kexec.h b/include/asm-x86/kexec.h index 8f855a1..262b63e 100644 --- a/include/asm-x86/kexec.h +++ b/include/asm-x86/kexec.h @@ -1,5 +1,5 @@ -#ifndef _KEXEC_H -#define _KEXEC_H +#ifndef ASM_X86__KEXEC_H +#define ASM_X86__KEXEC_H #ifdef CONFIG_X86_32 # define PA_CONTROL_PAGE 0 @@ -166,4 +166,4 @@ relocate_kernel(unsigned long indirection_page, #endif /* __ASSEMBLY__ */ -#endif /* _KEXEC_H */ +#endif /* ASM_X86__KEXEC_H */ diff --git a/include/asm-x86/kgdb.h b/include/asm-x86/kgdb.h index 484c475..83a7ee2 100644 --- a/include/asm-x86/kgdb.h +++ b/include/asm-x86/kgdb.h @@ -1,5 +1,5 @@ -#ifndef _ASM_KGDB_H_ -#define _ASM_KGDB_H_ +#ifndef ASM_X86__KGDB_H +#define ASM_X86__KGDB_H /* * Copyright (C) 2001-2004 Amit S. Kale @@ -78,4 +78,4 @@ static inline void arch_kgdb_breakpoint(void) #define BREAK_INSTR_SIZE 1 #define CACHE_FLUSH_IS_SAFE 1 -#endif /* _ASM_KGDB_H_ */ +#endif /* ASM_X86__KGDB_H */ diff --git a/include/asm-x86/kmap_types.h b/include/asm-x86/kmap_types.h index 5f41741..89f4449 100644 --- a/include/asm-x86/kmap_types.h +++ b/include/asm-x86/kmap_types.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_KMAP_TYPES_H -#define _ASM_X86_KMAP_TYPES_H +#ifndef ASM_X86__KMAP_TYPES_H +#define ASM_X86__KMAP_TYPES_H #if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM) # define D(n) __KM_FENCE_##n , @@ -26,4 +26,4 @@ D(13) KM_TYPE_NR #undef D -#endif +#endif /* ASM_X86__KMAP_TYPES_H */ diff --git a/include/asm-x86/kprobes.h b/include/asm-x86/kprobes.h index 54980b0..bd840786 100644 --- a/include/asm-x86/kprobes.h +++ b/include/asm-x86/kprobes.h @@ -1,5 +1,5 @@ -#ifndef _ASM_KPROBES_H -#define _ASM_KPROBES_H +#ifndef ASM_X86__KPROBES_H +#define ASM_X86__KPROBES_H /* * Kernel Probes (KProbes) * @@ -94,4 +94,4 @@ static inline void restore_interrupts(struct pt_regs *regs) extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); extern int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); -#endif /* _ASM_KPROBES_H */ +#endif /* ASM_X86__KPROBES_H */ diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h index 6f18408..78e954d 100644 --- a/include/asm-x86/kvm.h +++ b/include/asm-x86/kvm.h @@ -1,5 +1,5 @@ -#ifndef __LINUX_KVM_X86_H -#define __LINUX_KVM_X86_H +#ifndef ASM_X86__KVM_H +#define ASM_X86__KVM_H /* * KVM x86 specific structures and definitions @@ -230,4 +230,4 @@ struct kvm_pit_state { #define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) #define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) -#endif +#endif /* ASM_X86__KVM_H */ diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 924e1bf..920823d 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -8,8 +8,8 @@ * */ -#ifndef ASM_KVM_HOST_H -#define ASM_KVM_HOST_H +#ifndef ASM_X86__KVM_HOST_H +#define ASM_X86__KVM_HOST_H #include #include @@ -728,4 +728,4 @@ asmlinkage void kvm_handle_fault_on_reboot(void); KVM_EX_ENTRY " 666b, 667b \n\t" \ ".popsection" -#endif +#endif /* ASM_X86__KVM_HOST_H */ diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h index 76f3921..30054fd 100644 --- a/include/asm-x86/kvm_para.h +++ b/include/asm-x86/kvm_para.h @@ -1,5 +1,5 @@ -#ifndef __X86_KVM_PARA_H -#define __X86_KVM_PARA_H +#ifndef ASM_X86__KVM_PARA_H +#define ASM_X86__KVM_PARA_H /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It * should be used to determine that a VM is running under KVM. @@ -144,4 +144,4 @@ static inline unsigned int kvm_arch_para_features(void) #endif -#endif +#endif /* ASM_X86__KVM_PARA_H */ diff --git a/include/asm-x86/kvm_x86_emulate.h b/include/asm-x86/kvm_x86_emulate.h index 4e8c1e4..e2d9b03 100644 --- a/include/asm-x86/kvm_x86_emulate.h +++ b/include/asm-x86/kvm_x86_emulate.h @@ -8,8 +8,8 @@ * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4 */ -#ifndef __X86_EMULATE_H__ -#define __X86_EMULATE_H__ +#ifndef ASM_X86__KVM_X86_EMULATE_H +#define ASM_X86__KVM_X86_EMULATE_H struct x86_emulate_ctxt; @@ -181,4 +181,4 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops); -#endif /* __X86_EMULATE_H__ */ +#endif /* ASM_X86__KVM_X86_EMULATE_H */ diff --git a/include/asm-x86/ldt.h b/include/asm-x86/ldt.h index 20c5972..a522850 100644 --- a/include/asm-x86/ldt.h +++ b/include/asm-x86/ldt.h @@ -3,8 +3,8 @@ * * Definitions of structures used with the modify_ldt system call. */ -#ifndef _ASM_X86_LDT_H -#define _ASM_X86_LDT_H +#ifndef ASM_X86__LDT_H +#define ASM_X86__LDT_H /* Maximum number of LDT entries supported. */ #define LDT_ENTRIES 8192 @@ -37,4 +37,4 @@ struct user_desc { #define MODIFY_LDT_CONTENTS_CODE 2 #endif /* !__ASSEMBLY__ */ -#endif +#endif /* ASM_X86__LDT_H */ diff --git a/include/asm-x86/lguest.h b/include/asm-x86/lguest.h index be4a724..7505e94 100644 --- a/include/asm-x86/lguest.h +++ b/include/asm-x86/lguest.h @@ -1,5 +1,5 @@ -#ifndef _X86_LGUEST_H -#define _X86_LGUEST_H +#ifndef ASM_X86__LGUEST_H +#define ASM_X86__LGUEST_H #define GDT_ENTRY_LGUEST_CS 10 #define GDT_ENTRY_LGUEST_DS 11 @@ -91,4 +91,4 @@ static inline void lguest_set_ts(void) #endif /* __ASSEMBLY__ */ -#endif +#endif /* ASM_X86__LGUEST_H */ diff --git a/include/asm-x86/lguest_hcall.h b/include/asm-x86/lguest_hcall.h index a3241f2..8f034ba 100644 --- a/include/asm-x86/lguest_hcall.h +++ b/include/asm-x86/lguest_hcall.h @@ -1,6 +1,6 @@ /* Architecture specific portion of the lguest hypercalls */ -#ifndef _X86_LGUEST_HCALL_H -#define _X86_LGUEST_HCALL_H +#ifndef ASM_X86__LGUEST_HCALL_H +#define ASM_X86__LGUEST_HCALL_H #define LHCALL_FLUSH_ASYNC 0 #define LHCALL_LGUEST_INIT 1 @@ -68,4 +68,4 @@ struct hcall_args { }; #endif /* !__ASSEMBLY__ */ -#endif /* _I386_LGUEST_HCALL_H */ +#endif /* ASM_X86__LGUEST_HCALL_H */ diff --git a/include/asm-x86/linkage.h b/include/asm-x86/linkage.h index 64e444f..42d8b62 100644 --- a/include/asm-x86/linkage.h +++ b/include/asm-x86/linkage.h @@ -1,5 +1,5 @@ -#ifndef __ASM_LINKAGE_H -#define __ASM_LINKAGE_H +#ifndef ASM_X86__LINKAGE_H +#define ASM_X86__LINKAGE_H #undef notrace #define notrace __attribute__((no_instrument_function)) @@ -57,5 +57,5 @@ #define __ALIGN_STR ".align 16,0x90" #endif -#endif +#endif /* ASM_X86__LINKAGE_H */ diff --git a/include/asm-x86/local.h b/include/asm-x86/local.h index 330a724..ae91994 100644 --- a/include/asm-x86/local.h +++ b/include/asm-x86/local.h @@ -1,5 +1,5 @@ -#ifndef _ARCH_LOCAL_H -#define _ARCH_LOCAL_H +#ifndef ASM_X86__LOCAL_H +#define ASM_X86__LOCAL_H #include @@ -232,4 +232,4 @@ static inline long local_sub_return(long i, local_t *l) #define __cpu_local_add(i, l) cpu_local_add((i), (l)) #define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) -#endif /* _ARCH_LOCAL_H */ +#endif /* ASM_X86__LOCAL_H */ diff --git a/include/asm-x86/mach-bigsmp/mach_apic.h b/include/asm-x86/mach-bigsmp/mach_apic.h index c3b9dc6..05362d4 100644 --- a/include/asm-x86/mach-bigsmp/mach_apic.h +++ b/include/asm-x86/mach-bigsmp/mach_apic.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H +#ifndef ASM_X86__MACH_BIGSMP__MACH_APIC_H +#define ASM_X86__MACH_BIGSMP__MACH_APIC_H #define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) #define esr_disable (1) @@ -141,4 +141,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -#endif /* __ASM_MACH_APIC_H */ +#endif /* ASM_X86__MACH_BIGSMP__MACH_APIC_H */ diff --git a/include/asm-x86/mach-bigsmp/mach_apicdef.h b/include/asm-x86/mach-bigsmp/mach_apicdef.h index a58ab5a..811935d 100644 --- a/include/asm-x86/mach-bigsmp/mach_apicdef.h +++ b/include/asm-x86/mach-bigsmp/mach_apicdef.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APICDEF_H -#define __ASM_MACH_APICDEF_H +#ifndef ASM_X86__MACH_BIGSMP__MACH_APICDEF_H +#define ASM_X86__MACH_BIGSMP__MACH_APICDEF_H #define APIC_ID_MASK (0xFF<<24) @@ -10,4 +10,4 @@ static inline unsigned get_apic_id(unsigned long x) #define GET_APIC_ID(x) get_apic_id(x) -#endif +#endif /* ASM_X86__MACH_BIGSMP__MACH_APICDEF_H */ diff --git a/include/asm-x86/mach-bigsmp/mach_ipi.h b/include/asm-x86/mach-bigsmp/mach_ipi.h index 9404c53..b1b0f96 100644 --- a/include/asm-x86/mach-bigsmp/mach_ipi.h +++ b/include/asm-x86/mach-bigsmp/mach_ipi.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_IPI_H -#define __ASM_MACH_IPI_H +#ifndef ASM_X86__MACH_BIGSMP__MACH_IPI_H +#define ASM_X86__MACH_BIGSMP__MACH_IPI_H void send_IPI_mask_sequence(cpumask_t mask, int vector); @@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector) send_IPI_mask(cpu_online_map, vector); } -#endif /* __ASM_MACH_IPI_H */ +#endif /* ASM_X86__MACH_BIGSMP__MACH_IPI_H */ diff --git a/include/asm-x86/mach-default/apm.h b/include/asm-x86/mach-default/apm.h index 989f34c..2aa61b5 100644 --- a/include/asm-x86/mach-default/apm.h +++ b/include/asm-x86/mach-default/apm.h @@ -3,8 +3,8 @@ * Split out from apm.c by Osamu Tomita */ -#ifndef _ASM_APM_H -#define _ASM_APM_H +#ifndef ASM_X86__MACH_DEFAULT__APM_H +#define ASM_X86__MACH_DEFAULT__APM_H #ifdef APM_ZERO_SEGS # define APM_DO_ZERO_SEGS \ @@ -70,4 +70,4 @@ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, return error; } -#endif /* _ASM_APM_H */ +#endif /* ASM_X86__MACH_DEFAULT__APM_H */ diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h index f3226b9..b615f40 100644 --- a/include/asm-x86/mach-default/mach_apic.h +++ b/include/asm-x86/mach-default/mach_apic.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H +#ifndef ASM_X86__MACH_DEFAULT__MACH_APIC_H +#define ASM_X86__MACH_DEFAULT__MACH_APIC_H #ifdef CONFIG_X86_LOCAL_APIC @@ -138,4 +138,4 @@ static inline void enable_apic_mode(void) } #endif /* CONFIG_X86_LOCAL_APIC */ -#endif /* __ASM_MACH_APIC_H */ +#endif /* ASM_X86__MACH_DEFAULT__MACH_APIC_H */ diff --git a/include/asm-x86/mach-default/mach_apicdef.h b/include/asm-x86/mach-default/mach_apicdef.h index e4b29ba..936704f 100644 --- a/include/asm-x86/mach-default/mach_apicdef.h +++ b/include/asm-x86/mach-default/mach_apicdef.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APICDEF_H -#define __ASM_MACH_APICDEF_H +#ifndef ASM_X86__MACH_DEFAULT__MACH_APICDEF_H +#define ASM_X86__MACH_DEFAULT__MACH_APICDEF_H #include @@ -21,4 +21,4 @@ static inline unsigned get_apic_id(unsigned long x) #define GET_APIC_ID(x) get_apic_id(x) #endif -#endif +#endif /* ASM_X86__MACH_DEFAULT__MACH_APICDEF_H */ diff --git a/include/asm-x86/mach-default/mach_ipi.h b/include/asm-x86/mach-default/mach_ipi.h index be32336..674bc7e 100644 --- a/include/asm-x86/mach-default/mach_ipi.h +++ b/include/asm-x86/mach-default/mach_ipi.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_IPI_H -#define __ASM_MACH_IPI_H +#ifndef ASM_X86__MACH_DEFAULT__MACH_IPI_H +#define ASM_X86__MACH_DEFAULT__MACH_IPI_H /* Avoid include hell */ #define NMI_VECTOR 0x02 @@ -61,4 +61,4 @@ static inline void send_IPI_all(int vector) } #endif -#endif /* __ASM_MACH_IPI_H */ +#endif /* ASM_X86__MACH_DEFAULT__MACH_IPI_H */ diff --git a/include/asm-x86/mach-default/mach_mpparse.h b/include/asm-x86/mach-default/mach_mpparse.h index d141085..9c381f2 100644 --- a/include/asm-x86/mach-default/mach_mpparse.h +++ b/include/asm-x86/mach-default/mach_mpparse.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_MPPARSE_H -#define __ASM_MACH_MPPARSE_H +#ifndef ASM_X86__MACH_DEFAULT__MACH_MPPARSE_H +#define ASM_X86__MACH_DEFAULT__MACH_MPPARSE_H static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid) @@ -14,4 +14,4 @@ static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) } -#endif /* __ASM_MACH_MPPARSE_H */ +#endif /* ASM_X86__MACH_DEFAULT__MACH_MPPARSE_H */ diff --git a/include/asm-x86/mach-default/mach_mpspec.h b/include/asm-x86/mach-default/mach_mpspec.h index 51c9a97..d77646f 100644 --- a/include/asm-x86/mach-default/mach_mpspec.h +++ b/include/asm-x86/mach-default/mach_mpspec.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_MPSPEC_H -#define __ASM_MACH_MPSPEC_H +#ifndef ASM_X86__MACH_DEFAULT__MACH_MPSPEC_H +#define ASM_X86__MACH_DEFAULT__MACH_MPSPEC_H #define MAX_IRQ_SOURCES 256 @@ -9,4 +9,4 @@ #define MAX_MP_BUSSES 32 #endif -#endif /* __ASM_MACH_MPSPEC_H */ +#endif /* ASM_X86__MACH_DEFAULT__MACH_MPSPEC_H */ diff --git a/include/asm-x86/mach-default/mach_timer.h b/include/asm-x86/mach-default/mach_timer.h index 4b76e53..990b158 100644 --- a/include/asm-x86/mach-default/mach_timer.h +++ b/include/asm-x86/mach-default/mach_timer.h @@ -10,8 +10,8 @@ * directly because of the awkward 8-bit access mechanism of the 82C54 * device. */ -#ifndef _MACH_TIMER_H -#define _MACH_TIMER_H +#ifndef ASM_X86__MACH_DEFAULT__MACH_TIMER_H +#define ASM_X86__MACH_DEFAULT__MACH_TIMER_H #define CALIBRATE_TIME_MSEC 30 /* 30 msecs */ #define CALIBRATE_LATCH \ @@ -45,4 +45,4 @@ static inline void mach_countup(unsigned long *count_p) *count_p = count; } -#endif /* !_MACH_TIMER_H */ +#endif /* ASM_X86__MACH_DEFAULT__MACH_TIMER_H */ diff --git a/include/asm-x86/mach-default/mach_traps.h b/include/asm-x86/mach-default/mach_traps.h index 2fe7705..de9ac3f 100644 --- a/include/asm-x86/mach-default/mach_traps.h +++ b/include/asm-x86/mach-default/mach_traps.h @@ -2,8 +2,8 @@ * Machine specific NMI handling for generic. * Split out from traps.c by Osamu Tomita */ -#ifndef _MACH_TRAPS_H -#define _MACH_TRAPS_H +#ifndef ASM_X86__MACH_DEFAULT__MACH_TRAPS_H +#define ASM_X86__MACH_DEFAULT__MACH_TRAPS_H #include @@ -36,4 +36,4 @@ static inline void reassert_nmi(void) unlock_cmos(); } -#endif /* !_MACH_TRAPS_H */ +#endif /* ASM_X86__MACH_DEFAULT__MACH_TRAPS_H */ diff --git a/include/asm-x86/mach-default/mach_wakecpu.h b/include/asm-x86/mach-default/mach_wakecpu.h index 3ebb178..361b810 100644 --- a/include/asm-x86/mach-default/mach_wakecpu.h +++ b/include/asm-x86/mach-default/mach_wakecpu.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_WAKECPU_H -#define __ASM_MACH_WAKECPU_H +#ifndef ASM_X86__MACH_DEFAULT__MACH_WAKECPU_H +#define ASM_X86__MACH_DEFAULT__MACH_WAKECPU_H /* * This file copes with machines that wakeup secondary CPUs by the @@ -39,4 +39,4 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) #define inquire_remote_apic(apicid) {} #endif -#endif /* __ASM_MACH_WAKECPU_H */ +#endif /* ASM_X86__MACH_DEFAULT__MACH_WAKECPU_H */ diff --git a/include/asm-x86/mach-es7000/mach_apic.h b/include/asm-x86/mach-es7000/mach_apic.h index 0a3fdf9..c1f6f68 100644 --- a/include/asm-x86/mach-es7000/mach_apic.h +++ b/include/asm-x86/mach-es7000/mach_apic.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H +#ifndef ASM_X86__MACH_ES7000__MACH_APIC_H +#define ASM_X86__MACH_ES7000__MACH_APIC_H #define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) #define esr_disable (1) @@ -191,4 +191,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -#endif /* __ASM_MACH_APIC_H */ +#endif /* ASM_X86__MACH_ES7000__MACH_APIC_H */ diff --git a/include/asm-x86/mach-es7000/mach_apicdef.h b/include/asm-x86/mach-es7000/mach_apicdef.h index a58ab5a..a07e567 100644 --- a/include/asm-x86/mach-es7000/mach_apicdef.h +++ b/include/asm-x86/mach-es7000/mach_apicdef.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APICDEF_H -#define __ASM_MACH_APICDEF_H +#ifndef ASM_X86__MACH_ES7000__MACH_APICDEF_H +#define ASM_X86__MACH_ES7000__MACH_APICDEF_H #define APIC_ID_MASK (0xFF<<24) @@ -10,4 +10,4 @@ static inline unsigned get_apic_id(unsigned long x) #define GET_APIC_ID(x) get_apic_id(x) -#endif +#endif /* ASM_X86__MACH_ES7000__MACH_APICDEF_H */ diff --git a/include/asm-x86/mach-es7000/mach_ipi.h b/include/asm-x86/mach-es7000/mach_ipi.h index 5e61bd2..3a21240 100644 --- a/include/asm-x86/mach-es7000/mach_ipi.h +++ b/include/asm-x86/mach-es7000/mach_ipi.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_IPI_H -#define __ASM_MACH_IPI_H +#ifndef ASM_X86__MACH_ES7000__MACH_IPI_H +#define ASM_X86__MACH_ES7000__MACH_IPI_H void send_IPI_mask_sequence(cpumask_t mask, int vector); @@ -21,4 +21,4 @@ static inline void send_IPI_all(int vector) send_IPI_mask(cpu_online_map, vector); } -#endif /* __ASM_MACH_IPI_H */ +#endif /* ASM_X86__MACH_ES7000__MACH_IPI_H */ diff --git a/include/asm-x86/mach-es7000/mach_mpparse.h b/include/asm-x86/mach-es7000/mach_mpparse.h index ef26d35..befde24 100644 --- a/include/asm-x86/mach-es7000/mach_mpparse.h +++ b/include/asm-x86/mach-es7000/mach_mpparse.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_MPPARSE_H -#define __ASM_MACH_MPPARSE_H +#ifndef ASM_X86__MACH_ES7000__MACH_MPPARSE_H +#define ASM_X86__MACH_ES7000__MACH_MPPARSE_H #include @@ -26,4 +26,4 @@ static inline int es7000_check_dsdt(void) } #endif -#endif /* __ASM_MACH_MPPARSE_H */ +#endif /* ASM_X86__MACH_ES7000__MACH_MPPARSE_H */ diff --git a/include/asm-x86/mach-es7000/mach_wakecpu.h b/include/asm-x86/mach-es7000/mach_wakecpu.h index 84ff583..97c776c 100644 --- a/include/asm-x86/mach-es7000/mach_wakecpu.h +++ b/include/asm-x86/mach-es7000/mach_wakecpu.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_WAKECPU_H -#define __ASM_MACH_WAKECPU_H +#ifndef ASM_X86__MACH_ES7000__MACH_WAKECPU_H +#define ASM_X86__MACH_ES7000__MACH_WAKECPU_H /* * This file copes with machines that wakeup secondary CPUs by the @@ -56,4 +56,4 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) #define inquire_remote_apic(apicid) {} #endif -#endif /* __ASM_MACH_WAKECPU_H */ +#endif /* ASM_X86__MACH_ES7000__MACH_WAKECPU_H */ diff --git a/include/asm-x86/mach-generic/gpio.h b/include/asm-x86/mach-generic/gpio.h index 5305dcb..6ce0f77 100644 --- a/include/asm-x86/mach-generic/gpio.h +++ b/include/asm-x86/mach-generic/gpio.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_GENERIC_GPIO_H -#define __ASM_MACH_GENERIC_GPIO_H +#ifndef ASM_X86__MACH_GENERIC__GPIO_H +#define ASM_X86__MACH_GENERIC__GPIO_H int gpio_request(unsigned gpio, const char *label); void gpio_free(unsigned gpio); @@ -12,4 +12,4 @@ int irq_to_gpio(unsigned irq); #include /* cansleep wrappers */ -#endif /* __ASM_MACH_GENERIC_GPIO_H */ +#endif /* ASM_X86__MACH_GENERIC__GPIO_H */ diff --git a/include/asm-x86/mach-generic/irq_vectors_limits.h b/include/asm-x86/mach-generic/irq_vectors_limits.h index 890ce3f..f7870e1 100644 --- a/include/asm-x86/mach-generic/irq_vectors_limits.h +++ b/include/asm-x86/mach-generic/irq_vectors_limits.h @@ -1,5 +1,5 @@ -#ifndef _ASM_IRQ_VECTORS_LIMITS_H -#define _ASM_IRQ_VECTORS_LIMITS_H +#ifndef ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H +#define ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H /* * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs, @@ -11,4 +11,4 @@ #define NR_IRQS 224 #define NR_IRQ_VECTORS 1024 -#endif /* _ASM_IRQ_VECTORS_LIMITS_H */ +#endif /* ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H */ diff --git a/include/asm-x86/mach-generic/mach_apic.h b/include/asm-x86/mach-generic/mach_apic.h index 6eff343..5d010c6 100644 --- a/include/asm-x86/mach-generic/mach_apic.h +++ b/include/asm-x86/mach-generic/mach_apic.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H +#ifndef ASM_X86__MACH_GENERIC__MACH_APIC_H +#define ASM_X86__MACH_GENERIC__MACH_APIC_H #include @@ -29,4 +29,4 @@ extern void generic_bigsmp_probe(void); -#endif /* __ASM_MACH_APIC_H */ +#endif /* ASM_X86__MACH_GENERIC__MACH_APIC_H */ diff --git a/include/asm-x86/mach-generic/mach_apicdef.h b/include/asm-x86/mach-generic/mach_apicdef.h index 28ed989..1657f38 100644 --- a/include/asm-x86/mach-generic/mach_apicdef.h +++ b/include/asm-x86/mach-generic/mach_apicdef.h @@ -1,5 +1,5 @@ -#ifndef _GENAPIC_MACH_APICDEF_H -#define _GENAPIC_MACH_APICDEF_H 1 +#ifndef ASM_X86__MACH_GENERIC__MACH_APICDEF_H +#define ASM_X86__MACH_GENERIC__MACH_APICDEF_H #ifndef APIC_DEFINITION #include @@ -8,4 +8,4 @@ #define APIC_ID_MASK (genapic->apic_id_mask) #endif -#endif +#endif /* ASM_X86__MACH_GENERIC__MACH_APICDEF_H */ diff --git a/include/asm-x86/mach-generic/mach_ipi.h b/include/asm-x86/mach-generic/mach_ipi.h index 441b0fe..f67433d 100644 --- a/include/asm-x86/mach-generic/mach_ipi.h +++ b/include/asm-x86/mach-generic/mach_ipi.h @@ -1,5 +1,5 @@ -#ifndef _MACH_IPI_H -#define _MACH_IPI_H 1 +#ifndef ASM_X86__MACH_GENERIC__MACH_IPI_H +#define ASM_X86__MACH_GENERIC__MACH_IPI_H #include @@ -7,4 +7,4 @@ #define send_IPI_allbutself (genapic->send_IPI_allbutself) #define send_IPI_all (genapic->send_IPI_all) -#endif +#endif /* ASM_X86__MACH_GENERIC__MACH_IPI_H */ diff --git a/include/asm-x86/mach-generic/mach_mpparse.h b/include/asm-x86/mach-generic/mach_mpparse.h index 586cadb..3115564 100644 --- a/include/asm-x86/mach-generic/mach_mpparse.h +++ b/include/asm-x86/mach-generic/mach_mpparse.h @@ -1,5 +1,5 @@ -#ifndef _MACH_MPPARSE_H -#define _MACH_MPPARSE_H 1 +#ifndef ASM_X86__MACH_GENERIC__MACH_MPPARSE_H +#define ASM_X86__MACH_GENERIC__MACH_MPPARSE_H extern int mps_oem_check(struct mp_config_table *mpc, char *oem, @@ -7,4 +7,4 @@ extern int mps_oem_check(struct mp_config_table *mpc, char *oem, extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id); -#endif +#endif /* ASM_X86__MACH_GENERIC__MACH_MPPARSE_H */ diff --git a/include/asm-x86/mach-generic/mach_mpspec.h b/include/asm-x86/mach-generic/mach_mpspec.h index c83c120..6061b15 100644 --- a/include/asm-x86/mach-generic/mach_mpspec.h +++ b/include/asm-x86/mach-generic/mach_mpspec.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_MPSPEC_H -#define __ASM_MACH_MPSPEC_H +#ifndef ASM_X86__MACH_GENERIC__MACH_MPSPEC_H +#define ASM_X86__MACH_GENERIC__MACH_MPSPEC_H #define MAX_IRQ_SOURCES 256 @@ -9,4 +9,4 @@ extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid); -#endif /* __ASM_MACH_MPSPEC_H */ +#endif /* ASM_X86__MACH_GENERIC__MACH_MPSPEC_H */ diff --git a/include/asm-x86/mach-numaq/mach_apic.h b/include/asm-x86/mach-numaq/mach_apic.h index d802465..7a0d39e 100644 --- a/include/asm-x86/mach-numaq/mach_apic.h +++ b/include/asm-x86/mach-numaq/mach_apic.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H +#ifndef ASM_X86__MACH_NUMAQ__MACH_APIC_H +#define ASM_X86__MACH_NUMAQ__MACH_APIC_H #include #include @@ -135,4 +135,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -#endif /* __ASM_MACH_APIC_H */ +#endif /* ASM_X86__MACH_NUMAQ__MACH_APIC_H */ diff --git a/include/asm-x86/mach-numaq/mach_apicdef.h b/include/asm-x86/mach-numaq/mach_apicdef.h index bf439d0..f870ec5 100644 --- a/include/asm-x86/mach-numaq/mach_apicdef.h +++ b/include/asm-x86/mach-numaq/mach_apicdef.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APICDEF_H -#define __ASM_MACH_APICDEF_H +#ifndef ASM_X86__MACH_NUMAQ__MACH_APICDEF_H +#define ASM_X86__MACH_NUMAQ__MACH_APICDEF_H #define APIC_ID_MASK (0xF<<24) @@ -11,4 +11,4 @@ static inline unsigned get_apic_id(unsigned long x) #define GET_APIC_ID(x) get_apic_id(x) -#endif +#endif /* ASM_X86__MACH_NUMAQ__MACH_APICDEF_H */ diff --git a/include/asm-x86/mach-numaq/mach_ipi.h b/include/asm-x86/mach-numaq/mach_ipi.h index c604448..1e83582 100644 --- a/include/asm-x86/mach-numaq/mach_ipi.h +++ b/include/asm-x86/mach-numaq/mach_ipi.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_IPI_H -#define __ASM_MACH_IPI_H +#ifndef ASM_X86__MACH_NUMAQ__MACH_IPI_H +#define ASM_X86__MACH_NUMAQ__MACH_IPI_H void send_IPI_mask_sequence(cpumask_t, int vector); @@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector) send_IPI_mask(cpu_online_map, vector); } -#endif /* __ASM_MACH_IPI_H */ +#endif /* ASM_X86__MACH_NUMAQ__MACH_IPI_H */ diff --git a/include/asm-x86/mach-numaq/mach_mpparse.h b/include/asm-x86/mach-numaq/mach_mpparse.h index 626aef6..74ade18 100644 --- a/include/asm-x86/mach-numaq/mach_mpparse.h +++ b/include/asm-x86/mach-numaq/mach_mpparse.h @@ -1,7 +1,7 @@ -#ifndef __ASM_MACH_MPPARSE_H -#define __ASM_MACH_MPPARSE_H +#ifndef ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H +#define ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid); -#endif /* __ASM_MACH_MPPARSE_H */ +#endif /* ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H */ diff --git a/include/asm-x86/mach-numaq/mach_wakecpu.h b/include/asm-x86/mach-numaq/mach_wakecpu.h index 0053004..0db8cea 100644 --- a/include/asm-x86/mach-numaq/mach_wakecpu.h +++ b/include/asm-x86/mach-numaq/mach_wakecpu.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_WAKECPU_H -#define __ASM_MACH_WAKECPU_H +#ifndef ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H +#define ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H /* This file copes with machines that wakeup secondary CPUs by NMIs */ @@ -40,4 +40,4 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) #define inquire_remote_apic(apicid) {} -#endif /* __ASM_MACH_WAKECPU_H */ +#endif /* ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H */ diff --git a/include/asm-x86/mach-rdc321x/gpio.h b/include/asm-x86/mach-rdc321x/gpio.h index acce0b7..61845619 100644 --- a/include/asm-x86/mach-rdc321x/gpio.h +++ b/include/asm-x86/mach-rdc321x/gpio.h @@ -1,5 +1,5 @@ -#ifndef _RDC321X_GPIO_H -#define _RDC321X_GPIO_H +#ifndef ASM_X86__MACH_RDC321X__GPIO_H +#define ASM_X86__MACH_RDC321X__GPIO_H extern int rdc_gpio_get_value(unsigned gpio); extern void rdc_gpio_set_value(unsigned gpio, int value); @@ -54,4 +54,4 @@ static inline int irq_to_gpio(unsigned irq) /* For cansleep */ #include -#endif /* _RDC321X_GPIO_H_ */ +#endif /* ASM_X86__MACH_RDC321X__GPIO_H */ diff --git a/include/asm-x86/mach-summit/irq_vectors_limits.h b/include/asm-x86/mach-summit/irq_vectors_limits.h index 890ce3f..22f376a 100644 --- a/include/asm-x86/mach-summit/irq_vectors_limits.h +++ b/include/asm-x86/mach-summit/irq_vectors_limits.h @@ -1,5 +1,5 @@ -#ifndef _ASM_IRQ_VECTORS_LIMITS_H -#define _ASM_IRQ_VECTORS_LIMITS_H +#ifndef ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H +#define ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H /* * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs, @@ -11,4 +11,4 @@ #define NR_IRQS 224 #define NR_IRQ_VECTORS 1024 -#endif /* _ASM_IRQ_VECTORS_LIMITS_H */ +#endif /* ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H */ diff --git a/include/asm-x86/mach-summit/mach_apic.h b/include/asm-x86/mach-summit/mach_apic.h index 75d2c950..ef77af3 100644 --- a/include/asm-x86/mach-summit/mach_apic.h +++ b/include/asm-x86/mach-summit/mach_apic.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H +#ifndef ASM_X86__MACH_SUMMIT__MACH_APIC_H +#define ASM_X86__MACH_SUMMIT__MACH_APIC_H #include @@ -182,4 +182,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) return hard_smp_processor_id() >> index_msb; } -#endif /* __ASM_MACH_APIC_H */ +#endif /* ASM_X86__MACH_SUMMIT__MACH_APIC_H */ diff --git a/include/asm-x86/mach-summit/mach_apicdef.h b/include/asm-x86/mach-summit/mach_apicdef.h index a58ab5a..d4bc859 100644 --- a/include/asm-x86/mach-summit/mach_apicdef.h +++ b/include/asm-x86/mach-summit/mach_apicdef.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APICDEF_H -#define __ASM_MACH_APICDEF_H +#ifndef ASM_X86__MACH_SUMMIT__MACH_APICDEF_H +#define ASM_X86__MACH_SUMMIT__MACH_APICDEF_H #define APIC_ID_MASK (0xFF<<24) @@ -10,4 +10,4 @@ static inline unsigned get_apic_id(unsigned long x) #define GET_APIC_ID(x) get_apic_id(x) -#endif +#endif /* ASM_X86__MACH_SUMMIT__MACH_APICDEF_H */ diff --git a/include/asm-x86/mach-summit/mach_ipi.h b/include/asm-x86/mach-summit/mach_ipi.h index 9404c53..a3b31c5 100644 --- a/include/asm-x86/mach-summit/mach_ipi.h +++ b/include/asm-x86/mach-summit/mach_ipi.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_IPI_H -#define __ASM_MACH_IPI_H +#ifndef ASM_X86__MACH_SUMMIT__MACH_IPI_H +#define ASM_X86__MACH_SUMMIT__MACH_IPI_H void send_IPI_mask_sequence(cpumask_t mask, int vector); @@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector) send_IPI_mask(cpu_online_map, vector); } -#endif /* __ASM_MACH_IPI_H */ +#endif /* ASM_X86__MACH_SUMMIT__MACH_IPI_H */ diff --git a/include/asm-x86/mach-summit/mach_mpparse.h b/include/asm-x86/mach-summit/mach_mpparse.h index fdf5917..92396f2 100644 --- a/include/asm-x86/mach-summit/mach_mpparse.h +++ b/include/asm-x86/mach-summit/mach_mpparse.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_MPPARSE_H -#define __ASM_MACH_MPPARSE_H +#ifndef ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H +#define ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H #include #include @@ -107,4 +107,4 @@ static inline int is_WPEG(struct rio_detail *rio){ rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); } -#endif /* __ASM_MACH_MPPARSE_H */ +#endif /* ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H */ diff --git a/include/asm-x86/math_emu.h b/include/asm-x86/math_emu.h index 9bf4ae9..5768d8e 100644 --- a/include/asm-x86/math_emu.h +++ b/include/asm-x86/math_emu.h @@ -1,5 +1,5 @@ -#ifndef _I386_MATH_EMU_H -#define _I386_MATH_EMU_H +#ifndef ASM_X86__MATH_EMU_H +#define ASM_X86__MATH_EMU_H /* This structure matches the layout of the data saved to the stack following a device-not-present interrupt, part of it saved @@ -28,4 +28,4 @@ struct info { long ___vm86_fs; long ___vm86_gs; }; -#endif +#endif /* ASM_X86__MATH_EMU_H */ diff --git a/include/asm-x86/mc146818rtc.h b/include/asm-x86/mc146818rtc.h index daf1ccd..a995f33 100644 --- a/include/asm-x86/mc146818rtc.h +++ b/include/asm-x86/mc146818rtc.h @@ -1,8 +1,8 @@ /* * Machine dependent access functions for RTC registers. */ -#ifndef _ASM_MC146818RTC_H -#define _ASM_MC146818RTC_H +#ifndef ASM_X86__MC146818RTC_H +#define ASM_X86__MC146818RTC_H #include #include @@ -101,4 +101,4 @@ extern unsigned long mach_get_cmos_time(void); #define RTC_IRQ 8 -#endif /* _ASM_MC146818RTC_H */ +#endif /* ASM_X86__MC146818RTC_H */ diff --git a/include/asm-x86/mca.h b/include/asm-x86/mca.h index 09adf2e..60d1ed2 100644 --- a/include/asm-x86/mca.h +++ b/include/asm-x86/mca.h @@ -1,8 +1,8 @@ /* -*- mode: c; c-basic-offset: 8 -*- */ /* Platform specific MCA defines */ -#ifndef _ASM_MCA_H -#define _ASM_MCA_H +#ifndef ASM_X86__MCA_H +#define ASM_X86__MCA_H /* Maximal number of MCA slots - actually, some machines have less, but * they all have sufficient number of POS registers to cover 8. @@ -40,4 +40,4 @@ */ #define MCA_NUMADAPTERS (MCA_MAX_SLOT_NR+3) -#endif +#endif /* ASM_X86__MCA_H */ diff --git a/include/asm-x86/mca_dma.h b/include/asm-x86/mca_dma.h index c3dca6e..49f22be 100644 --- a/include/asm-x86/mca_dma.h +++ b/include/asm-x86/mca_dma.h @@ -1,5 +1,5 @@ -#ifndef MCA_DMA_H -#define MCA_DMA_H +#ifndef ASM_X86__MCA_DMA_H +#define ASM_X86__MCA_DMA_H #include #include @@ -198,4 +198,4 @@ static inline void mca_set_dma_mode(unsigned int dmanr, unsigned int mode) outb(mode, MCA_DMA_REG_EXE); } -#endif /* MCA_DMA_H */ +#endif /* ASM_X86__MCA_DMA_H */ diff --git a/include/asm-x86/mce.h b/include/asm-x86/mce.h index 94f1fd7..6a580f2 100644 --- a/include/asm-x86/mce.h +++ b/include/asm-x86/mce.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_MCE_H -#define _ASM_X86_MCE_H +#ifndef ASM_X86__MCE_H +#define ASM_X86__MCE_H #ifdef __x86_64__ @@ -126,4 +126,4 @@ extern void restart_mce(void); #endif /* __KERNEL__ */ -#endif +#endif /* ASM_X86__MCE_H */ diff --git a/include/asm-x86/mman.h b/include/asm-x86/mman.h index c1682b5..b6b41aa 100644 --- a/include/asm-x86/mman.h +++ b/include/asm-x86/mman.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_MMAN_H -#define _ASM_X86_MMAN_H +#ifndef ASM_X86__MMAN_H +#define ASM_X86__MMAN_H #include @@ -16,4 +16,4 @@ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#endif /* _ASM_X86_MMAN_H */ +#endif /* ASM_X86__MMAN_H */ diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h index 95beda0..8689f1e 100644 --- a/include/asm-x86/mmconfig.h +++ b/include/asm-x86/mmconfig.h @@ -1,5 +1,5 @@ -#ifndef _ASM_MMCONFIG_H -#define _ASM_MMCONFIG_H +#ifndef ASM_X86__MMCONFIG_H +#define ASM_X86__MMCONFIG_H #ifdef CONFIG_PCI_MMCONFIG extern void __cpuinit fam10h_check_enable_mmcfg(void); @@ -9,4 +9,4 @@ static inline void fam10h_check_enable_mmcfg(void) { } static inline void check_enable_amd_mmconf_dmi(void) { } #endif -#endif +#endif /* ASM_X86__MMCONFIG_H */ diff --git a/include/asm-x86/mmu.h b/include/asm-x86/mmu.h index 00e8867..a30d7a9 100644 --- a/include/asm-x86/mmu.h +++ b/include/asm-x86/mmu.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_MMU_H -#define _ASM_X86_MMU_H +#ifndef ASM_X86__MMU_H +#define ASM_X86__MMU_H #include #include @@ -28,4 +28,4 @@ static inline void leave_mm(int cpu) } #endif -#endif /* _ASM_X86_MMU_H */ +#endif /* ASM_X86__MMU_H */ diff --git a/include/asm-x86/mmu_context.h b/include/asm-x86/mmu_context.h index fac5701..8ec940b 100644 --- a/include/asm-x86/mmu_context.h +++ b/include/asm-x86/mmu_context.h @@ -1,5 +1,5 @@ -#ifndef __ASM_X86_MMU_CONTEXT_H -#define __ASM_X86_MMU_CONTEXT_H +#ifndef ASM_X86__MMU_CONTEXT_H +#define ASM_X86__MMU_CONTEXT_H #include #include @@ -34,4 +34,4 @@ do { \ } while (0); -#endif /* __ASM_X86_MMU_CONTEXT_H */ +#endif /* ASM_X86__MMU_CONTEXT_H */ diff --git a/include/asm-x86/mmu_context_32.h b/include/asm-x86/mmu_context_32.h index 824fc57..cce6f6e 100644 --- a/include/asm-x86/mmu_context_32.h +++ b/include/asm-x86/mmu_context_32.h @@ -1,5 +1,5 @@ -#ifndef __I386_SCHED_H -#define __I386_SCHED_H +#ifndef ASM_X86__MMU_CONTEXT_32_H +#define ASM_X86__MMU_CONTEXT_32_H static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { @@ -53,4 +53,4 @@ static inline void switch_mm(struct mm_struct *prev, #define deactivate_mm(tsk, mm) \ asm("movl %0,%%gs": :"r" (0)); -#endif +#endif /* ASM_X86__MMU_CONTEXT_32_H */ diff --git a/include/asm-x86/mmu_context_64.h b/include/asm-x86/mmu_context_64.h index c700063..2675867 100644 --- a/include/asm-x86/mmu_context_64.h +++ b/include/asm-x86/mmu_context_64.h @@ -1,5 +1,5 @@ -#ifndef __X86_64_MMU_CONTEXT_H -#define __X86_64_MMU_CONTEXT_H +#ifndef ASM_X86__MMU_CONTEXT_64_H +#define ASM_X86__MMU_CONTEXT_64_H #include @@ -51,4 +51,4 @@ do { \ asm volatile("movl %0,%%fs"::"r"(0)); \ } while (0) -#endif +#endif /* ASM_X86__MMU_CONTEXT_64_H */ diff --git a/include/asm-x86/mmx.h b/include/asm-x86/mmx.h index 9408812..2e7299b 100644 --- a/include/asm-x86/mmx.h +++ b/include/asm-x86/mmx.h @@ -1,5 +1,5 @@ -#ifndef _ASM_MMX_H -#define _ASM_MMX_H +#ifndef ASM_X86__MMX_H +#define ASM_X86__MMX_H /* * MMX 3Dnow! helper operations @@ -11,4 +11,4 @@ extern void *_mmx_memcpy(void *to, const void *from, size_t size); extern void mmx_clear_page(void *page); extern void mmx_copy_page(void *to, void *from); -#endif +#endif /* ASM_X86__MMX_H */ diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h index b2298a2..b98590f 100644 --- a/include/asm-x86/mmzone_32.h +++ b/include/asm-x86/mmzone_32.h @@ -3,8 +3,8 @@ * */ -#ifndef _ASM_MMZONE_H_ -#define _ASM_MMZONE_H_ +#ifndef ASM_X86__MMZONE_32_H +#define ASM_X86__MMZONE_32_H #include @@ -125,4 +125,4 @@ static inline int pfn_valid(int pfn) }) #endif /* CONFIG_NEED_MULTIPLE_NODES */ -#endif /* _ASM_MMZONE_H_ */ +#endif /* ASM_X86__MMZONE_32_H */ diff --git a/include/asm-x86/mmzone_64.h b/include/asm-x86/mmzone_64.h index 594bd0d..626b03a 100644 --- a/include/asm-x86/mmzone_64.h +++ b/include/asm-x86/mmzone_64.h @@ -1,8 +1,8 @@ /* K8 NUMA support */ /* Copyright 2002,2003 by Andi Kleen, SuSE Labs */ /* 2.5 Version loosely based on the NUMAQ Code by Pat Gaughen. */ -#ifndef _ASM_X86_64_MMZONE_H -#define _ASM_X86_64_MMZONE_H 1 +#ifndef ASM_X86__MMZONE_64_H +#define ASM_X86__MMZONE_64_H #ifdef CONFIG_NUMA @@ -49,4 +49,4 @@ extern int early_pfn_to_nid(unsigned long pfn); #endif #endif -#endif +#endif /* ASM_X86__MMZONE_64_H */ diff --git a/include/asm-x86/module.h b/include/asm-x86/module.h index bfedb24..48dc3e0 100644 --- a/include/asm-x86/module.h +++ b/include/asm-x86/module.h @@ -1,5 +1,5 @@ -#ifndef _ASM_MODULE_H -#define _ASM_MODULE_H +#ifndef ASM_X86__MODULE_H +#define ASM_X86__MODULE_H /* x86_32/64 are simple */ struct mod_arch_specific {}; @@ -79,4 +79,4 @@ struct mod_arch_specific {}; # define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE #endif -#endif /* _ASM_MODULE_H */ +#endif /* ASM_X86__MODULE_H */ diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h index b6995e5..118da36 100644 --- a/include/asm-x86/mpspec.h +++ b/include/asm-x86/mpspec.h @@ -1,5 +1,5 @@ -#ifndef _AM_X86_MPSPEC_H -#define _AM_X86_MPSPEC_H +#ifndef ASM_X86__MPSPEC_H +#define ASM_X86__MPSPEC_H #include @@ -141,4 +141,4 @@ static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) extern physid_mask_t phys_cpu_present_map; -#endif +#endif /* ASM_X86__MPSPEC_H */ diff --git a/include/asm-x86/mpspec_def.h b/include/asm-x86/mpspec_def.h index 38d1e73..79166b0 100644 --- a/include/asm-x86/mpspec_def.h +++ b/include/asm-x86/mpspec_def.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MPSPEC_DEF_H -#define __ASM_MPSPEC_DEF_H +#ifndef ASM_X86__MPSPEC_DEF_H +#define ASM_X86__MPSPEC_DEF_H /* * Structure definitions for SMP machines following the @@ -177,4 +177,4 @@ enum mp_bustype { MP_BUS_PCI, MP_BUS_MCA, }; -#endif +#endif /* ASM_X86__MPSPEC_DEF_H */ diff --git a/include/asm-x86/msgbuf.h b/include/asm-x86/msgbuf.h index 7e4e948..1b538c9 100644 --- a/include/asm-x86/msgbuf.h +++ b/include/asm-x86/msgbuf.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_MSGBUF_H -#define _ASM_X86_MSGBUF_H +#ifndef ASM_X86__MSGBUF_H +#define ASM_X86__MSGBUF_H /* * The msqid64_ds structure for i386 architecture. @@ -36,4 +36,4 @@ struct msqid64_ds { unsigned long __unused5; }; -#endif /* _ASM_X86_MSGBUF_H */ +#endif /* ASM_X86__MSGBUF_H */ diff --git a/include/asm-x86/msidef.h b/include/asm-x86/msidef.h index 296f29c..3139666 100644 --- a/include/asm-x86/msidef.h +++ b/include/asm-x86/msidef.h @@ -1,5 +1,5 @@ -#ifndef ASM_MSIDEF_H -#define ASM_MSIDEF_H +#ifndef ASM_X86__MSIDEF_H +#define ASM_X86__MSIDEF_H /* * Constants for Intel APIC based MSI messages. @@ -48,4 +48,4 @@ #define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ MSI_ADDR_DEST_ID_MASK) -#endif /* ASM_MSIDEF_H */ +#endif /* ASM_X86__MSIDEF_H */ diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h index 44bce77..3052f05 100644 --- a/include/asm-x86/msr-index.h +++ b/include/asm-x86/msr-index.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MSR_INDEX_H -#define __ASM_MSR_INDEX_H +#ifndef ASM_X86__MSR_INDEX_H +#define ASM_X86__MSR_INDEX_H /* CPU model specific register (MSR) numbers */ @@ -310,4 +310,4 @@ /* Geode defined MSRs */ #define MSR_GEODE_BUSCONT_CONF0 0x00001900 -#endif /* __ASM_MSR_INDEX_H */ +#endif /* ASM_X86__MSR_INDEX_H */ diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h index ca110ee..0329920 100644 --- a/include/asm-x86/msr.h +++ b/include/asm-x86/msr.h @@ -1,5 +1,5 @@ -#ifndef __ASM_X86_MSR_H_ -#define __ASM_X86_MSR_H_ +#ifndef ASM_X86__MSR_H +#define ASM_X86__MSR_H #include @@ -220,4 +220,4 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) #endif /* __KERNEL__ */ -#endif +#endif /* ASM_X86__MSR_H */ diff --git a/include/asm-x86/mtrr.h b/include/asm-x86/mtrr.h index a69a01a..23a7f83 100644 --- a/include/asm-x86/mtrr.h +++ b/include/asm-x86/mtrr.h @@ -20,8 +20,8 @@ The postal address is: Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. */ -#ifndef _ASM_X86_MTRR_H -#define _ASM_X86_MTRR_H +#ifndef ASM_X86__MTRR_H +#define ASM_X86__MTRR_H #include #include @@ -170,4 +170,4 @@ struct mtrr_gentry32 { #endif /* __KERNEL__ */ -#endif /* _ASM_X86_MTRR_H */ +#endif /* ASM_X86__MTRR_H */ diff --git a/include/asm-x86/mutex_32.h b/include/asm-x86/mutex_32.h index 73e928e..25c16d8 100644 --- a/include/asm-x86/mutex_32.h +++ b/include/asm-x86/mutex_32.h @@ -6,8 +6,8 @@ * * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar */ -#ifndef _ASM_MUTEX_H -#define _ASM_MUTEX_H +#ifndef ASM_X86__MUTEX_32_H +#define ASM_X86__MUTEX_32_H #include @@ -122,4 +122,4 @@ static inline int __mutex_fastpath_trylock(atomic_t *count, #endif } -#endif +#endif /* ASM_X86__MUTEX_32_H */ diff --git a/include/asm-x86/mutex_64.h b/include/asm-x86/mutex_64.h index f3fae9b..918ba21 100644 --- a/include/asm-x86/mutex_64.h +++ b/include/asm-x86/mutex_64.h @@ -6,8 +6,8 @@ * * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar */ -#ifndef _ASM_MUTEX_H -#define _ASM_MUTEX_H +#ifndef ASM_X86__MUTEX_64_H +#define ASM_X86__MUTEX_64_H /** * __mutex_fastpath_lock - decrement and call function if negative @@ -97,4 +97,4 @@ static inline int __mutex_fastpath_trylock(atomic_t *count, return 0; } -#endif +#endif /* ASM_X86__MUTEX_64_H */ diff --git a/include/asm-x86/namei.h b/include/asm-x86/namei.h index 415ef5d..6e5de58 100644 --- a/include/asm-x86/namei.h +++ b/include/asm-x86/namei.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_NAMEI_H -#define _ASM_X86_NAMEI_H +#ifndef ASM_X86__NAMEI_H +#define ASM_X86__NAMEI_H /* This dummy routine maybe changed to something useful * for /usr/gnemul/ emulation stuff. @@ -8,4 +8,4 @@ #define __emul_prefix() NULL -#endif /* _ASM_X86_NAMEI_H */ +#endif /* ASM_X86__NAMEI_H */ diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h index 21f8d02..f8b76f3 100644 --- a/include/asm-x86/nmi.h +++ b/include/asm-x86/nmi.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_NMI_H_ -#define _ASM_X86_NMI_H_ +#ifndef ASM_X86__NMI_H +#define ASM_X86__NMI_H #include #include @@ -81,4 +81,4 @@ void enable_lapic_nmi_watchdog(void); void stop_nmi(void); void restart_nmi(void); -#endif +#endif /* ASM_X86__NMI_H */ diff --git a/include/asm-x86/nops.h b/include/asm-x86/nops.h index ad0bedd..ae74272 100644 --- a/include/asm-x86/nops.h +++ b/include/asm-x86/nops.h @@ -1,5 +1,5 @@ -#ifndef _ASM_NOPS_H -#define _ASM_NOPS_H 1 +#ifndef ASM_X86__NOPS_H +#define ASM_X86__NOPS_H /* Define nops for use with alternative() */ @@ -115,4 +115,4 @@ #define ASM_NOP_MAX 8 -#endif +#endif /* ASM_X86__NOPS_H */ diff --git a/include/asm-x86/numa_32.h b/include/asm-x86/numa_32.h index 220d7b7..44cb078 100644 --- a/include/asm-x86/numa_32.h +++ b/include/asm-x86/numa_32.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_32_NUMA_H -#define _ASM_X86_32_NUMA_H 1 +#ifndef ASM_X86__NUMA_32_H +#define ASM_X86__NUMA_32_H extern int pxm_to_nid(int pxm); extern void numa_remove_cpu(int cpu); @@ -8,4 +8,4 @@ extern void numa_remove_cpu(int cpu); extern void set_highmem_pages_init(void); #endif -#endif /* _ASM_X86_32_NUMA_H */ +#endif /* ASM_X86__NUMA_32_H */ diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h index 3830094..15c9903 100644 --- a/include/asm-x86/numa_64.h +++ b/include/asm-x86/numa_64.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X8664_NUMA_H -#define _ASM_X8664_NUMA_H 1 +#ifndef ASM_X86__NUMA_64_H +#define ASM_X86__NUMA_64_H #include #include @@ -40,4 +40,4 @@ static inline void numa_add_cpu(int cpu, int node) { } static inline void numa_remove_cpu(int cpu) { } #endif -#endif +#endif /* ASM_X86__NUMA_64_H */ diff --git a/include/asm-x86/numaq.h b/include/asm-x86/numaq.h index 34b92d5..124bf7d 100644 --- a/include/asm-x86/numaq.h +++ b/include/asm-x86/numaq.h @@ -23,8 +23,8 @@ * Send feedback to */ -#ifndef NUMAQ_H -#define NUMAQ_H +#ifndef ASM_X86__NUMAQ_H +#define ASM_X86__NUMAQ_H #ifdef CONFIG_X86_NUMAQ @@ -165,5 +165,5 @@ static inline int get_memcfg_numaq(void) return 0; } #endif /* CONFIG_X86_NUMAQ */ -#endif /* NUMAQ_H */ +#endif /* ASM_X86__NUMAQ_H */ diff --git a/include/asm-x86/olpc.h b/include/asm-x86/olpc.h index 97d4713..d7328b1 100644 --- a/include/asm-x86/olpc.h +++ b/include/asm-x86/olpc.h @@ -1,7 +1,7 @@ /* OLPC machine specific definitions */ -#ifndef ASM_OLPC_H_ -#define ASM_OLPC_H_ +#ifndef ASM_X86__OLPC_H +#define ASM_X86__OLPC_H #include @@ -129,4 +129,4 @@ extern int olpc_ec_mask_unset(uint8_t bits); #define OLPC_GPIO_LID geode_gpio(26) #define OLPC_GPIO_ECSCI geode_gpio(27) -#endif +#endif /* ASM_X86__OLPC_H */ diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h index 28d7b45..068a636 100644 --- a/include/asm-x86/page.h +++ b/include/asm-x86/page.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PAGE_H -#define _ASM_X86_PAGE_H +#ifndef ASM_X86__PAGE_H +#define ASM_X86__PAGE_H #include @@ -192,4 +192,4 @@ static inline pteval_t native_pte_val(pte_t pte) #define __HAVE_ARCH_GATE_AREA 1 #endif /* __KERNEL__ */ -#endif /* _ASM_X86_PAGE_H */ +#endif /* ASM_X86__PAGE_H */ diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h index ab85287..85ac2b8 100644 --- a/include/asm-x86/page_32.h +++ b/include/asm-x86/page_32.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PAGE_32_H -#define _ASM_X86_PAGE_32_H +#ifndef ASM_X86__PAGE_32_H +#define ASM_X86__PAGE_32_H /* * This handles the memory map. @@ -126,4 +126,4 @@ static inline void copy_page(void *to, void *from) #endif /* CONFIG_X86_3DNOW */ #endif /* !__ASSEMBLY__ */ -#endif /* _ASM_X86_PAGE_32_H */ +#endif /* ASM_X86__PAGE_32_H */ diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h index c6916c8..87aef3c 100644 --- a/include/asm-x86/page_64.h +++ b/include/asm-x86/page_64.h @@ -1,5 +1,5 @@ -#ifndef _X86_64_PAGE_H -#define _X86_64_PAGE_H +#ifndef ASM_X86__PAGE_64_H +#define ASM_X86__PAGE_64_H #define PAGETABLE_LEVELS 4 @@ -102,4 +102,4 @@ extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); #endif -#endif /* _X86_64_PAGE_H */ +#endif /* ASM_X86__PAGE_64_H */ diff --git a/include/asm-x86/param.h b/include/asm-x86/param.h index 6f0d042..0009cfb 100644 --- a/include/asm-x86/param.h +++ b/include/asm-x86/param.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PARAM_H -#define _ASM_X86_PARAM_H +#ifndef ASM_X86__PARAM_H +#define ASM_X86__PARAM_H #ifdef __KERNEL__ # define HZ CONFIG_HZ /* Internal kernel timer frequency */ @@ -19,4 +19,4 @@ #define MAXHOSTNAMELEN 64 /* max length of hostname */ -#endif /* _ASM_X86_PARAM_H */ +#endif /* ASM_X86__PARAM_H */ diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index aec9767..1b7eff0 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -1,5 +1,5 @@ -#ifndef __ASM_PARAVIRT_H -#define __ASM_PARAVIRT_H +#ifndef ASM_X86__PARAVIRT_H +#define ASM_X86__PARAVIRT_H /* Various instructions on x86 need to be replaced for * para-virtualization: those hooks are defined here. */ @@ -1631,4 +1631,4 @@ static inline unsigned long __raw_local_irq_save(void) #endif /* __ASSEMBLY__ */ #endif /* CONFIG_PARAVIRT */ -#endif /* __ASM_PARAVIRT_H */ +#endif /* ASM_X86__PARAVIRT_H */ diff --git a/include/asm-x86/parport.h b/include/asm-x86/parport.h index 3c4ffeb..2e3dda4 100644 --- a/include/asm-x86/parport.h +++ b/include/asm-x86/parport.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PARPORT_H -#define _ASM_X86_PARPORT_H +#ifndef ASM_X86__PARPORT_H +#define ASM_X86__PARPORT_H static int __devinit parport_pc_find_isa_ports(int autoirq, int autodma); static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma) @@ -7,4 +7,4 @@ static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma) return parport_pc_find_isa_ports(autoirq, autodma); } -#endif /* _ASM_X86_PARPORT_H */ +#endif /* ASM_X86__PARPORT_H */ diff --git a/include/asm-x86/pat.h b/include/asm-x86/pat.h index 7edc473..482c3e3 100644 --- a/include/asm-x86/pat.h +++ b/include/asm-x86/pat.h @@ -1,5 +1,5 @@ -#ifndef _ASM_PAT_H -#define _ASM_PAT_H +#ifndef ASM_X86__PAT_H +#define ASM_X86__PAT_H #include @@ -19,4 +19,4 @@ extern int free_memtype(u64 start, u64 end); extern void pat_disable(char *reason); -#endif +#endif /* ASM_X86__PAT_H */ diff --git a/include/asm-x86/pci-direct.h b/include/asm-x86/pci-direct.h index 80c775d..da42be0 100644 --- a/include/asm-x86/pci-direct.h +++ b/include/asm-x86/pci-direct.h @@ -1,5 +1,5 @@ -#ifndef ASM_PCI_DIRECT_H -#define ASM_PCI_DIRECT_H 1 +#ifndef ASM_X86__PCI_DIRECT_H +#define ASM_X86__PCI_DIRECT_H #include @@ -18,4 +18,4 @@ extern int early_pci_allowed(void); extern unsigned int pci_early_dump_regs; extern void early_dump_pci_device(u8 bus, u8 slot, u8 func); extern void early_dump_pci_devices(void); -#endif +#endif /* ASM_X86__PCI_DIRECT_H */ diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h index 2db14cf..6025831 100644 --- a/include/asm-x86/pci.h +++ b/include/asm-x86/pci.h @@ -1,5 +1,5 @@ -#ifndef __x86_PCI_H -#define __x86_PCI_H +#ifndef ASM_X86__PCI_H +#define ASM_X86__PCI_H #include /* for struct page */ #include @@ -111,4 +111,4 @@ static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus) } #endif -#endif +#endif /* ASM_X86__PCI_H */ diff --git a/include/asm-x86/pci_32.h b/include/asm-x86/pci_32.h index a50d468..3f22882 100644 --- a/include/asm-x86/pci_32.h +++ b/include/asm-x86/pci_32.h @@ -1,5 +1,5 @@ -#ifndef __i386_PCI_H -#define __i386_PCI_H +#ifndef ASM_X86__PCI_32_H +#define ASM_X86__PCI_32_H #ifdef __KERNEL__ @@ -31,4 +31,4 @@ struct pci_dev; #endif /* __KERNEL__ */ -#endif /* __i386_PCI_H */ +#endif /* ASM_X86__PCI_32_H */ diff --git a/include/asm-x86/pci_64.h b/include/asm-x86/pci_64.h index f330234..f72e12d 100644 --- a/include/asm-x86/pci_64.h +++ b/include/asm-x86/pci_64.h @@ -1,5 +1,5 @@ -#ifndef __x8664_PCI_H -#define __x8664_PCI_H +#ifndef ASM_X86__PCI_64_H +#define ASM_X86__PCI_64_H #ifdef __KERNEL__ @@ -63,4 +63,4 @@ extern void pci_iommu_alloc(void); #endif /* __KERNEL__ */ -#endif /* __x8664_PCI_H */ +#endif /* ASM_X86__PCI_64_H */ diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h index b34e9a7..80860af 100644 --- a/include/asm-x86/pda.h +++ b/include/asm-x86/pda.h @@ -1,5 +1,5 @@ -#ifndef X86_64_PDA_H -#define X86_64_PDA_H +#ifndef ASM_X86__PDA_H +#define ASM_X86__PDA_H #ifndef __ASSEMBLY__ #include @@ -134,4 +134,4 @@ do { \ #define PDA_STACKOFFSET (5*8) -#endif +#endif /* ASM_X86__PDA_H */ diff --git a/include/asm-x86/percpu.h b/include/asm-x86/percpu.h index 4e91ee1..0afc832 100644 --- a/include/asm-x86/percpu.h +++ b/include/asm-x86/percpu.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PERCPU_H_ -#define _ASM_X86_PERCPU_H_ +#ifndef ASM_X86__PERCPU_H +#define ASM_X86__PERCPU_H #ifdef CONFIG_X86_64 #include @@ -215,4 +215,4 @@ do { \ #endif /* !CONFIG_SMP */ -#endif /* _ASM_X86_PERCPU_H_ */ +#endif /* ASM_X86__PERCPU_H */ diff --git a/include/asm-x86/pgalloc.h b/include/asm-x86/pgalloc.h index d63ea43..3cd23ad 100644 --- a/include/asm-x86/pgalloc.h +++ b/include/asm-x86/pgalloc.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PGALLOC_H -#define _ASM_X86_PGALLOC_H +#ifndef ASM_X86__PGALLOC_H +#define ASM_X86__PGALLOC_H #include #include /* for struct page */ @@ -111,4 +111,4 @@ extern void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud); #endif /* PAGETABLE_LEVELS > 3 */ #endif /* PAGETABLE_LEVELS > 2 */ -#endif /* _ASM_X86_PGALLOC_H */ +#endif /* ASM_X86__PGALLOC_H */ diff --git a/include/asm-x86/pgtable-2level-defs.h b/include/asm-x86/pgtable-2level-defs.h index 0f71c9f..7ec48f4 100644 --- a/include/asm-x86/pgtable-2level-defs.h +++ b/include/asm-x86/pgtable-2level-defs.h @@ -1,5 +1,5 @@ -#ifndef _I386_PGTABLE_2LEVEL_DEFS_H -#define _I386_PGTABLE_2LEVEL_DEFS_H +#ifndef ASM_X86__PGTABLE_2LEVEL_DEFS_H +#define ASM_X86__PGTABLE_2LEVEL_DEFS_H #define SHARED_KERNEL_PMD 0 @@ -17,4 +17,4 @@ #define PTRS_PER_PTE 1024 -#endif /* _I386_PGTABLE_2LEVEL_DEFS_H */ +#endif /* ASM_X86__PGTABLE_2LEVEL_DEFS_H */ diff --git a/include/asm-x86/pgtable-2level.h b/include/asm-x86/pgtable-2level.h index 46bc52c..60440b1 100644 --- a/include/asm-x86/pgtable-2level.h +++ b/include/asm-x86/pgtable-2level.h @@ -1,5 +1,5 @@ -#ifndef _I386_PGTABLE_2LEVEL_H -#define _I386_PGTABLE_2LEVEL_H +#ifndef ASM_X86__PGTABLE_2LEVEL_H +#define ASM_X86__PGTABLE_2LEVEL_H #define pte_ERROR(e) \ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) @@ -78,4 +78,4 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) -#endif /* _I386_PGTABLE_2LEVEL_H */ +#endif /* ASM_X86__PGTABLE_2LEVEL_H */ diff --git a/include/asm-x86/pgtable-3level-defs.h b/include/asm-x86/pgtable-3level-defs.h index 448ac95..c05fe6f 100644 --- a/include/asm-x86/pgtable-3level-defs.h +++ b/include/asm-x86/pgtable-3level-defs.h @@ -1,5 +1,5 @@ -#ifndef _I386_PGTABLE_3LEVEL_DEFS_H -#define _I386_PGTABLE_3LEVEL_DEFS_H +#ifndef ASM_X86__PGTABLE_3LEVEL_DEFS_H +#define ASM_X86__PGTABLE_3LEVEL_DEFS_H #ifdef CONFIG_PARAVIRT #define SHARED_KERNEL_PMD (pv_info.shared_kernel_pmd) @@ -25,4 +25,4 @@ */ #define PTRS_PER_PTE 512 -#endif /* _I386_PGTABLE_3LEVEL_DEFS_H */ +#endif /* ASM_X86__PGTABLE_3LEVEL_DEFS_H */ diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h index c93dbb6..a9ad971 100644 --- a/include/asm-x86/pgtable-3level.h +++ b/include/asm-x86/pgtable-3level.h @@ -1,5 +1,5 @@ -#ifndef _I386_PGTABLE_3LEVEL_H -#define _I386_PGTABLE_3LEVEL_H +#ifndef ASM_X86__PGTABLE_3LEVEL_H +#define ASM_X86__PGTABLE_3LEVEL_H /* * Intel Physical Address Extension (PAE) Mode - three-level page @@ -179,4 +179,4 @@ static inline unsigned long pte_pfn(pte_t pte) #define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high }) #define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } }) -#endif /* _I386_PGTABLE_3LEVEL_H */ +#endif /* ASM_X86__PGTABLE_3LEVEL_H */ diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 96aa76e..b8d7c53 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PGTABLE_H -#define _ASM_X86_PGTABLE_H +#ifndef ASM_X86__PGTABLE_H +#define ASM_X86__PGTABLE_H #define FIRST_USER_ADDRESS 0 @@ -518,4 +518,4 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) #include #endif /* __ASSEMBLY__ */ -#endif /* _ASM_X86_PGTABLE_H */ +#endif /* ASM_X86__PGTABLE_H */ diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index 0611abf..4fa3b04 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h @@ -1,5 +1,5 @@ -#ifndef _I386_PGTABLE_H -#define _I386_PGTABLE_H +#ifndef ASM_X86__PGTABLE_32_H +#define ASM_X86__PGTABLE_32_H /* @@ -186,4 +186,4 @@ do { \ #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ remap_pfn_range(vma, vaddr, pfn, size, prot) -#endif /* _I386_PGTABLE_H */ +#endif /* ASM_X86__PGTABLE_32_H */ diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h index 805d312..b80c0d7 100644 --- a/include/asm-x86/pgtable_64.h +++ b/include/asm-x86/pgtable_64.h @@ -1,5 +1,5 @@ -#ifndef _X86_64_PGTABLE_H -#define _X86_64_PGTABLE_H +#ifndef ASM_X86__PGTABLE_64_H +#define ASM_X86__PGTABLE_64_H #include #ifndef __ASSEMBLY__ @@ -284,4 +284,4 @@ extern void cleanup_highmap(void); #define __HAVE_ARCH_PTE_SAME #endif /* !__ASSEMBLY__ */ -#endif /* _X86_64_PGTABLE_H */ +#endif /* ASM_X86__PGTABLE_64_H */ diff --git a/include/asm-x86/posix_types_32.h b/include/asm-x86/posix_types_32.h index b031efd..70cf2bb 100644 --- a/include/asm-x86/posix_types_32.h +++ b/include/asm-x86/posix_types_32.h @@ -1,5 +1,5 @@ -#ifndef __ARCH_I386_POSIX_TYPES_H -#define __ARCH_I386_POSIX_TYPES_H +#ifndef ASM_X86__POSIX_TYPES_32_H +#define ASM_X86__POSIX_TYPES_32_H /* * This file is generally used by user-level software, so you need to @@ -82,4 +82,4 @@ do { \ #endif /* defined(__KERNEL__) */ -#endif +#endif /* ASM_X86__POSIX_TYPES_32_H */ diff --git a/include/asm-x86/posix_types_64.h b/include/asm-x86/posix_types_64.h index d6624c9..388b4e7 100644 --- a/include/asm-x86/posix_types_64.h +++ b/include/asm-x86/posix_types_64.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_64_POSIX_TYPES_H -#define _ASM_X86_64_POSIX_TYPES_H +#ifndef ASM_X86__POSIX_TYPES_64_H +#define ASM_X86__POSIX_TYPES_64_H /* * This file is generally used by user-level software, so you need to @@ -116,4 +116,4 @@ static inline void __FD_ZERO(__kernel_fd_set *p) #endif /* defined(__KERNEL__) */ -#endif +#endif /* ASM_X86__POSIX_TYPES_64_H */ diff --git a/include/asm-x86/prctl.h b/include/asm-x86/prctl.h index 52952ad..e7ae34e 100644 --- a/include/asm-x86/prctl.h +++ b/include/asm-x86/prctl.h @@ -1,5 +1,5 @@ -#ifndef X86_64_PRCTL_H -#define X86_64_PRCTL_H 1 +#ifndef ASM_X86__PRCTL_H +#define ASM_X86__PRCTL_H #define ARCH_SET_GS 0x1001 #define ARCH_SET_FS 0x1002 @@ -7,4 +7,4 @@ #define ARCH_GET_GS 0x1004 -#endif +#endif /* ASM_X86__PRCTL_H */ diff --git a/include/asm-x86/processor-flags.h b/include/asm-x86/processor-flags.h index 092b39b..ae1d434 100644 --- a/include/asm-x86/processor-flags.h +++ b/include/asm-x86/processor-flags.h @@ -1,5 +1,5 @@ -#ifndef __ASM_I386_PROCESSOR_FLAGS_H -#define __ASM_I386_PROCESSOR_FLAGS_H +#ifndef ASM_X86__PROCESSOR_FLAGS_H +#define ASM_X86__PROCESSOR_FLAGS_H /* Various flags defined: can be included from assembler. */ /* @@ -94,4 +94,4 @@ #define X86_VM_MASK 0 /* No VM86 support */ #endif -#endif /* __ASM_I386_PROCESSOR_FLAGS_H */ +#endif /* ASM_X86__PROCESSOR_FLAGS_H */ diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 15cb82a..a9a28ba 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -1,5 +1,5 @@ -#ifndef __ASM_X86_PROCESSOR_H -#define __ASM_X86_PROCESSOR_H +#ifndef ASM_X86__PROCESSOR_H +#define ASM_X86__PROCESSOR_H #include @@ -920,4 +920,4 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); -#endif +#endif /* ASM_X86__PROCESSOR_H */ diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h index 3dd458c..6e89e8b 100644 --- a/include/asm-x86/proto.h +++ b/include/asm-x86/proto.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X8664_PROTO_H -#define _ASM_X8664_PROTO_H 1 +#ifndef ASM_X86__PROTO_H +#define ASM_X86__PROTO_H #include @@ -29,4 +29,4 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); #define round_up(x, y) (((x) + (y) - 1) & ~((y) - 1)) #define round_down(x, y) ((x) & ~((y) - 1)) -#endif +#endif /* ASM_X86__PROTO_H */ diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h index 72e7b9d..d0cf334 100644 --- a/include/asm-x86/ptrace-abi.h +++ b/include/asm-x86/ptrace-abi.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PTRACE_ABI_H -#define _ASM_X86_PTRACE_ABI_H +#ifndef ASM_X86__PTRACE_ABI_H +#define ASM_X86__PTRACE_ABI_H #ifdef __i386__ @@ -140,4 +140,4 @@ struct ptrace_bts_config { Returns number of BTS records drained. */ -#endif +#endif /* ASM_X86__PTRACE_ABI_H */ diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h index 8a71db8..d464f25 100644 --- a/include/asm-x86/ptrace.h +++ b/include/asm-x86/ptrace.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PTRACE_H -#define _ASM_X86_PTRACE_H +#ifndef ASM_X86__PTRACE_H +#define ASM_X86__PTRACE_H #include /* For __user */ #include @@ -239,4 +239,4 @@ extern int do_set_thread_area(struct task_struct *p, int idx, #endif /* !__ASSEMBLY__ */ -#endif +#endif /* ASM_X86__PTRACE_H */ diff --git a/include/asm-x86/pvclock-abi.h b/include/asm-x86/pvclock-abi.h index 6857f84..edb3b4e 100644 --- a/include/asm-x86/pvclock-abi.h +++ b/include/asm-x86/pvclock-abi.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PVCLOCK_ABI_H_ -#define _ASM_X86_PVCLOCK_ABI_H_ +#ifndef ASM_X86__PVCLOCK_ABI_H +#define ASM_X86__PVCLOCK_ABI_H #ifndef __ASSEMBLY__ /* @@ -39,4 +39,4 @@ struct pvclock_wall_clock { } __attribute__((__packed__)); #endif /* __ASSEMBLY__ */ -#endif /* _ASM_X86_PVCLOCK_ABI_H_ */ +#endif /* ASM_X86__PVCLOCK_ABI_H */ diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h index 85b1bba..1a38f68 100644 --- a/include/asm-x86/pvclock.h +++ b/include/asm-x86/pvclock.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PVCLOCK_H_ -#define _ASM_X86_PVCLOCK_H_ +#ifndef ASM_X86__PVCLOCK_H +#define ASM_X86__PVCLOCK_H #include #include @@ -10,4 +10,4 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall, struct pvclock_vcpu_time_info *vcpu, struct timespec *ts); -#endif /* _ASM_X86_PVCLOCK_H_ */ +#endif /* ASM_X86__PVCLOCK_H */ diff --git a/include/asm-x86/reboot.h b/include/asm-x86/reboot.h index 206f355..1c2f0ce 100644 --- a/include/asm-x86/reboot.h +++ b/include/asm-x86/reboot.h @@ -1,5 +1,5 @@ -#ifndef _ASM_REBOOT_H -#define _ASM_REBOOT_H +#ifndef ASM_X86__REBOOT_H +#define ASM_X86__REBOOT_H struct pt_regs; @@ -18,4 +18,4 @@ void native_machine_crash_shutdown(struct pt_regs *regs); void native_machine_shutdown(void); void machine_real_restart(const unsigned char *code, int length); -#endif /* _ASM_REBOOT_H */ +#endif /* ASM_X86__REBOOT_H */ diff --git a/include/asm-x86/reboot_fixups.h b/include/asm-x86/reboot_fixups.h index 0cb7d87..2c2987d 100644 --- a/include/asm-x86/reboot_fixups.h +++ b/include/asm-x86/reboot_fixups.h @@ -1,6 +1,6 @@ -#ifndef _LINUX_REBOOT_FIXUPS_H -#define _LINUX_REBOOT_FIXUPS_H +#ifndef ASM_X86__REBOOT_FIXUPS_H +#define ASM_X86__REBOOT_FIXUPS_H extern void mach_reboot_fixups(void); -#endif /* _LINUX_REBOOT_FIXUPS_H */ +#endif /* ASM_X86__REBOOT_FIXUPS_H */ diff --git a/include/asm-x86/required-features.h b/include/asm-x86/required-features.h index adec887..d6822e0 100644 --- a/include/asm-x86/required-features.h +++ b/include/asm-x86/required-features.h @@ -1,5 +1,5 @@ -#ifndef _ASM_REQUIRED_FEATURES_H -#define _ASM_REQUIRED_FEATURES_H 1 +#ifndef ASM_X86__REQUIRED_FEATURES_H +#define ASM_X86__REQUIRED_FEATURES_H /* Define minimum CPUID feature set for kernel These bits are checked really early to actually display a visible error message before the @@ -73,4 +73,4 @@ #define REQUIRED_MASK6 0 #define REQUIRED_MASK7 0 -#endif +#endif /* ASM_X86__REQUIRED_FEATURES_H */ diff --git a/include/asm-x86/resume-trace.h b/include/asm-x86/resume-trace.h index 8d9f0b4..519a8ec 100644 --- a/include/asm-x86/resume-trace.h +++ b/include/asm-x86/resume-trace.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_RESUME_TRACE_H -#define _ASM_X86_RESUME_TRACE_H +#ifndef ASM_X86__RESUME_TRACE_H +#define ASM_X86__RESUME_TRACE_H #include @@ -18,4 +18,4 @@ do { \ } \ } while (0) -#endif +#endif /* ASM_X86__RESUME_TRACE_H */ diff --git a/include/asm-x86/rio.h b/include/asm-x86/rio.h index c9448bd..5e1256b 100644 --- a/include/asm-x86/rio.h +++ b/include/asm-x86/rio.h @@ -5,8 +5,8 @@ * Author: Laurent Vivier */ -#ifndef __ASM_RIO_H -#define __ASM_RIO_H +#ifndef ASM_X86__RIO_H +#define ASM_X86__RIO_H #define RIO_TABLE_VERSION 3 @@ -60,4 +60,4 @@ enum { ALT_CALGARY = 5, /* Second Planar Calgary */ }; -#endif /* __ASM_RIO_H */ +#endif /* ASM_X86__RIO_H */ diff --git a/include/asm-x86/rwlock.h b/include/asm-x86/rwlock.h index 6a8c0d6..48a3109 100644 --- a/include/asm-x86/rwlock.h +++ b/include/asm-x86/rwlock.h @@ -1,8 +1,8 @@ -#ifndef _ASM_X86_RWLOCK_H -#define _ASM_X86_RWLOCK_H +#ifndef ASM_X86__RWLOCK_H +#define ASM_X86__RWLOCK_H #define RW_LOCK_BIAS 0x01000000 /* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */ -#endif /* _ASM_X86_RWLOCK_H */ +#endif /* ASM_X86__RWLOCK_H */ diff --git a/include/asm-x86/rwsem.h b/include/asm-x86/rwsem.h index 750f2a3..3ff3015 100644 --- a/include/asm-x86/rwsem.h +++ b/include/asm-x86/rwsem.h @@ -29,8 +29,8 @@ * front, then they'll all be woken up, but no other readers will be. */ -#ifndef _I386_RWSEM_H -#define _I386_RWSEM_H +#ifndef ASM_X86__RWSEM_H +#define ASM_X86__RWSEM_H #ifndef _LINUX_RWSEM_H #error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" @@ -262,4 +262,4 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) } #endif /* __KERNEL__ */ -#endif /* _I386_RWSEM_H */ +#endif /* ASM_X86__RWSEM_H */ diff --git a/include/asm-x86/scatterlist.h b/include/asm-x86/scatterlist.h index c043206..ee48f88 100644 --- a/include/asm-x86/scatterlist.h +++ b/include/asm-x86/scatterlist.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SCATTERLIST_H -#define _ASM_X86_SCATTERLIST_H +#ifndef ASM_X86__SCATTERLIST_H +#define ASM_X86__SCATTERLIST_H #include @@ -30,4 +30,4 @@ struct scatterlist { # define sg_dma_len(sg) ((sg)->dma_length) #endif -#endif +#endif /* ASM_X86__SCATTERLIST_H */ diff --git a/include/asm-x86/seccomp_32.h b/include/asm-x86/seccomp_32.h index 36e71c5..cf9ab2d 100644 --- a/include/asm-x86/seccomp_32.h +++ b/include/asm-x86/seccomp_32.h @@ -1,5 +1,5 @@ -#ifndef _ASM_SECCOMP_H -#define _ASM_SECCOMP_H +#ifndef ASM_X86__SECCOMP_32_H +#define ASM_X86__SECCOMP_32_H #include @@ -14,4 +14,4 @@ #define __NR_seccomp_exit __NR_exit #define __NR_seccomp_sigreturn __NR_sigreturn -#endif /* _ASM_SECCOMP_H */ +#endif /* ASM_X86__SECCOMP_32_H */ diff --git a/include/asm-x86/seccomp_64.h b/include/asm-x86/seccomp_64.h index 76cfe69..03274ce 100644 --- a/include/asm-x86/seccomp_64.h +++ b/include/asm-x86/seccomp_64.h @@ -1,5 +1,5 @@ -#ifndef _ASM_SECCOMP_H -#define _ASM_SECCOMP_H +#ifndef ASM_X86__SECCOMP_64_H +#define ASM_X86__SECCOMP_64_H #include @@ -22,4 +22,4 @@ #define __NR_seccomp_exit_32 __NR_ia32_exit #define __NR_seccomp_sigreturn_32 __NR_ia32_sigreturn -#endif /* _ASM_SECCOMP_H */ +#endif /* ASM_X86__SECCOMP_64_H */ diff --git a/include/asm-x86/segment.h b/include/asm-x86/segment.h index 646452e..ea5f0a8 100644 --- a/include/asm-x86/segment.h +++ b/include/asm-x86/segment.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SEGMENT_H_ -#define _ASM_X86_SEGMENT_H_ +#ifndef ASM_X86__SEGMENT_H +#define ASM_X86__SEGMENT_H /* Constructor for a conventional segment GDT (or LDT) entry */ /* This is a macro so it can be used in initializers */ @@ -212,4 +212,4 @@ extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][10]; #endif #endif -#endif +#endif /* ASM_X86__SEGMENT_H */ diff --git a/include/asm-x86/sembuf.h b/include/asm-x86/sembuf.h index ee50c80..81f06b7 100644 --- a/include/asm-x86/sembuf.h +++ b/include/asm-x86/sembuf.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SEMBUF_H -#define _ASM_X86_SEMBUF_H +#ifndef ASM_X86__SEMBUF_H +#define ASM_X86__SEMBUF_H /* * The semid64_ds structure for x86 architecture. @@ -21,4 +21,4 @@ struct semid64_ds { unsigned long __unused4; }; -#endif /* _ASM_X86_SEMBUF_H */ +#endif /* ASM_X86__SEMBUF_H */ diff --git a/include/asm-x86/serial.h b/include/asm-x86/serial.h index 628c801..303660b 100644 --- a/include/asm-x86/serial.h +++ b/include/asm-x86/serial.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SERIAL_H -#define _ASM_X86_SERIAL_H +#ifndef ASM_X86__SERIAL_H +#define ASM_X86__SERIAL_H /* * This assumes you have a 1.8432 MHz clock for your UART. @@ -26,4 +26,4 @@ { 0, BASE_BAUD, 0x3E8, 4, STD_COM_FLAGS }, /* ttyS2 */ \ { 0, BASE_BAUD, 0x2E8, 3, STD_COM4_FLAGS }, /* ttyS3 */ -#endif /* _ASM_X86_SERIAL_H */ +#endif /* ASM_X86__SERIAL_H */ diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h index a07c6f1..b7aeba9 100644 --- a/include/asm-x86/setup.h +++ b/include/asm-x86/setup.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SETUP_H -#define _ASM_X86_SETUP_H +#ifndef ASM_X86__SETUP_H +#define ASM_X86__SETUP_H #define COMMAND_LINE_SIZE 2048 @@ -100,4 +100,4 @@ void __init x86_64_start_reservations(char *real_mode_data); #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ -#endif /* _ASM_X86_SETUP_H */ +#endif /* ASM_X86__SETUP_H */ diff --git a/include/asm-x86/shmbuf.h b/include/asm-x86/shmbuf.h index b51413b..f51aec2 100644 --- a/include/asm-x86/shmbuf.h +++ b/include/asm-x86/shmbuf.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SHMBUF_H -#define _ASM_X86_SHMBUF_H +#ifndef ASM_X86__SHMBUF_H +#define ASM_X86__SHMBUF_H /* * The shmid64_ds structure for x86 architecture. @@ -48,4 +48,4 @@ struct shminfo64 { unsigned long __unused4; }; -#endif /* _ASM_X86_SHMBUF_H */ +#endif /* ASM_X86__SHMBUF_H */ diff --git a/include/asm-x86/shmparam.h b/include/asm-x86/shmparam.h index 0880cf0..a83a1fd 100644 --- a/include/asm-x86/shmparam.h +++ b/include/asm-x86/shmparam.h @@ -1,6 +1,6 @@ -#ifndef _ASM_X86_SHMPARAM_H -#define _ASM_X86_SHMPARAM_H +#ifndef ASM_X86__SHMPARAM_H +#define ASM_X86__SHMPARAM_H #define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ -#endif /* _ASM_X86_SHMPARAM_H */ +#endif /* ASM_X86__SHMPARAM_H */ diff --git a/include/asm-x86/sigcontext.h b/include/asm-x86/sigcontext.h index 2f9c884..24879c8 100644 --- a/include/asm-x86/sigcontext.h +++ b/include/asm-x86/sigcontext.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SIGCONTEXT_H -#define _ASM_X86_SIGCONTEXT_H +#ifndef ASM_X86__SIGCONTEXT_H +#define ASM_X86__SIGCONTEXT_H #include #include @@ -202,4 +202,4 @@ struct sigcontext { #endif /* !__i386__ */ -#endif +#endif /* ASM_X86__SIGCONTEXT_H */ diff --git a/include/asm-x86/sigcontext32.h b/include/asm-x86/sigcontext32.h index 57a9686..4e2ec73 100644 --- a/include/asm-x86/sigcontext32.h +++ b/include/asm-x86/sigcontext32.h @@ -1,5 +1,5 @@ -#ifndef _SIGCONTEXT32_H -#define _SIGCONTEXT32_H 1 +#ifndef ASM_X86__SIGCONTEXT32_H +#define ASM_X86__SIGCONTEXT32_H /* signal context for 32bit programs. */ @@ -68,4 +68,4 @@ struct sigcontext_ia32 { unsigned int cr2; }; -#endif +#endif /* ASM_X86__SIGCONTEXT32_H */ diff --git a/include/asm-x86/siginfo.h b/include/asm-x86/siginfo.h index a477bea..808bdfb 100644 --- a/include/asm-x86/siginfo.h +++ b/include/asm-x86/siginfo.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SIGINFO_H -#define _ASM_X86_SIGINFO_H +#ifndef ASM_X86__SIGINFO_H +#define ASM_X86__SIGINFO_H #ifdef __x86_64__ # define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) @@ -7,4 +7,4 @@ #include -#endif +#endif /* ASM_X86__SIGINFO_H */ diff --git a/include/asm-x86/signal.h b/include/asm-x86/signal.h index 6dac493..6ee3282 100644 --- a/include/asm-x86/signal.h +++ b/include/asm-x86/signal.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SIGNAL_H -#define _ASM_X86_SIGNAL_H +#ifndef ASM_X86__SIGNAL_H +#define ASM_X86__SIGNAL_H #ifndef __ASSEMBLY__ #include @@ -256,4 +256,4 @@ struct pt_regs; #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ -#endif +#endif /* ASM_X86__SIGNAL_H */ diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index 3c877f7..cd3c5f0 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SMP_H_ -#define _ASM_X86_SMP_H_ +#ifndef ASM_X86__SMP_H +#define ASM_X86__SMP_H #ifndef __ASSEMBLY__ #include #include @@ -205,4 +205,4 @@ extern void cpu_uninit(void); #endif #endif /* __ASSEMBLY__ */ -#endif +#endif /* ASM_X86__SMP_H */ diff --git a/include/asm-x86/socket.h b/include/asm-x86/socket.h index 80af9c4..db73274 100644 --- a/include/asm-x86/socket.h +++ b/include/asm-x86/socket.h @@ -1,5 +1,5 @@ -#ifndef _ASM_SOCKET_H -#define _ASM_SOCKET_H +#ifndef ASM_X86__SOCKET_H +#define ASM_X86__SOCKET_H #include @@ -54,4 +54,4 @@ #define SO_MARK 36 -#endif /* _ASM_SOCKET_H */ +#endif /* ASM_X86__SOCKET_H */ diff --git a/include/asm-x86/sockios.h b/include/asm-x86/sockios.h index 49cc72b..a006704 100644 --- a/include/asm-x86/sockios.h +++ b/include/asm-x86/sockios.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SOCKIOS_H -#define _ASM_X86_SOCKIOS_H +#ifndef ASM_X86__SOCKIOS_H +#define ASM_X86__SOCKIOS_H /* Socket-level I/O control calls. */ #define FIOSETOWN 0x8901 @@ -10,4 +10,4 @@ #define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ #define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ -#endif /* _ASM_X86_SOCKIOS_H */ +#endif /* ASM_X86__SOCKIOS_H */ diff --git a/include/asm-x86/sparsemem.h b/include/asm-x86/sparsemem.h index 9bd48b0..38f8e6b 100644 --- a/include/asm-x86/sparsemem.h +++ b/include/asm-x86/sparsemem.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SPARSEMEM_H -#define _ASM_X86_SPARSEMEM_H +#ifndef ASM_X86__SPARSEMEM_H +#define ASM_X86__SPARSEMEM_H #ifdef CONFIG_SPARSEMEM /* @@ -31,4 +31,4 @@ #endif #endif /* CONFIG_SPARSEMEM */ -#endif +#endif /* ASM_X86__SPARSEMEM_H */ diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h index 4f9a986..cbe0108 100644 --- a/include/asm-x86/spinlock.h +++ b/include/asm-x86/spinlock.h @@ -1,5 +1,5 @@ -#ifndef _X86_SPINLOCK_H_ -#define _X86_SPINLOCK_H_ +#ifndef ASM_X86__SPINLOCK_H +#define ASM_X86__SPINLOCK_H #include #include @@ -366,4 +366,4 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw) #define _raw_read_relax(lock) cpu_relax() #define _raw_write_relax(lock) cpu_relax() -#endif +#endif /* ASM_X86__SPINLOCK_H */ diff --git a/include/asm-x86/spinlock_types.h b/include/asm-x86/spinlock_types.h index 06c071c..6aa9b56 100644 --- a/include/asm-x86/spinlock_types.h +++ b/include/asm-x86/spinlock_types.h @@ -1,5 +1,5 @@ -#ifndef __ASM_SPINLOCK_TYPES_H -#define __ASM_SPINLOCK_TYPES_H +#ifndef ASM_X86__SPINLOCK_TYPES_H +#define ASM_X86__SPINLOCK_TYPES_H #ifndef __LINUX_SPINLOCK_TYPES_H # error "please don't include this file directly" @@ -17,4 +17,4 @@ typedef struct { #define __RAW_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } -#endif +#endif /* ASM_X86__SPINLOCK_TYPES_H */ diff --git a/include/asm-x86/srat.h b/include/asm-x86/srat.h index 774c919..5363e4f 100644 --- a/include/asm-x86/srat.h +++ b/include/asm-x86/srat.h @@ -24,8 +24,8 @@ * Send feedback to Pat Gaughen */ -#ifndef _ASM_SRAT_H_ -#define _ASM_SRAT_H_ +#ifndef ASM_X86__SRAT_H +#define ASM_X86__SRAT_H #ifdef CONFIG_ACPI_NUMA extern int get_memcfg_from_srat(void); @@ -36,4 +36,4 @@ static inline int get_memcfg_from_srat(void) } #endif -#endif /* _ASM_SRAT_H_ */ +#endif /* ASM_X86__SRAT_H */ diff --git a/include/asm-x86/stacktrace.h b/include/asm-x86/stacktrace.h index 30f8252..f43517e 100644 --- a/include/asm-x86/stacktrace.h +++ b/include/asm-x86/stacktrace.h @@ -1,5 +1,5 @@ -#ifndef _ASM_STACKTRACE_H -#define _ASM_STACKTRACE_H 1 +#ifndef ASM_X86__STACKTRACE_H +#define ASM_X86__STACKTRACE_H extern int kstack_depth_to_print; @@ -18,4 +18,4 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data); -#endif +#endif /* ASM_X86__STACKTRACE_H */ diff --git a/include/asm-x86/stat.h b/include/asm-x86/stat.h index 5c22dcb..1e120f6 100644 --- a/include/asm-x86/stat.h +++ b/include/asm-x86/stat.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_STAT_H -#define _ASM_X86_STAT_H +#ifndef ASM_X86__STAT_H +#define ASM_X86__STAT_H #define STAT_HAVE_NSEC 1 @@ -111,4 +111,4 @@ struct __old_kernel_stat { #endif }; -#endif +#endif /* ASM_X86__STAT_H */ diff --git a/include/asm-x86/statfs.h b/include/asm-x86/statfs.h index 7c651aa..3f005bc 100644 --- a/include/asm-x86/statfs.h +++ b/include/asm-x86/statfs.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_STATFS_H -#define _ASM_X86_STATFS_H +#ifndef ASM_X86__STATFS_H +#define ASM_X86__STATFS_H #ifdef __i386__ #include @@ -60,4 +60,4 @@ struct compat_statfs64 { } __attribute__((packed)); #endif /* !__i386__ */ -#endif +#endif /* ASM_X86__STATFS_H */ diff --git a/include/asm-x86/string_32.h b/include/asm-x86/string_32.h index 193578c..487843e 100644 --- a/include/asm-x86/string_32.h +++ b/include/asm-x86/string_32.h @@ -1,5 +1,5 @@ -#ifndef _I386_STRING_H_ -#define _I386_STRING_H_ +#ifndef ASM_X86__STRING_32_H +#define ASM_X86__STRING_32_H #ifdef __KERNEL__ @@ -323,4 +323,4 @@ extern void *memscan(void *addr, int c, size_t size); #endif /* __KERNEL__ */ -#endif +#endif /* ASM_X86__STRING_32_H */ diff --git a/include/asm-x86/string_64.h b/include/asm-x86/string_64.h index 52b5ab3..a2add11d 100644 --- a/include/asm-x86/string_64.h +++ b/include/asm-x86/string_64.h @@ -1,5 +1,5 @@ -#ifndef _X86_64_STRING_H_ -#define _X86_64_STRING_H_ +#ifndef ASM_X86__STRING_64_H +#define ASM_X86__STRING_64_H #ifdef __KERNEL__ @@ -57,4 +57,4 @@ int strcmp(const char *cs, const char *ct); #endif /* __KERNEL__ */ -#endif +#endif /* ASM_X86__STRING_64_H */ diff --git a/include/asm-x86/suspend_32.h b/include/asm-x86/suspend_32.h index 8675c67..acb6d4d 100644 --- a/include/asm-x86/suspend_32.h +++ b/include/asm-x86/suspend_32.h @@ -3,8 +3,8 @@ * Based on code * Copyright 2001 Patrick Mochel */ -#ifndef __ASM_X86_32_SUSPEND_H -#define __ASM_X86_32_SUSPEND_H +#ifndef ASM_X86__SUSPEND_32_H +#define ASM_X86__SUSPEND_32_H #include #include @@ -48,4 +48,4 @@ static inline void acpi_save_register_state(unsigned long return_point) extern int acpi_save_state_mem(void); #endif -#endif /* __ASM_X86_32_SUSPEND_H */ +#endif /* ASM_X86__SUSPEND_32_H */ diff --git a/include/asm-x86/suspend_64.h b/include/asm-x86/suspend_64.h index dc3262b..cf821dd 100644 --- a/include/asm-x86/suspend_64.h +++ b/include/asm-x86/suspend_64.h @@ -3,8 +3,8 @@ * Based on code * Copyright 2001 Patrick Mochel */ -#ifndef __ASM_X86_64_SUSPEND_H -#define __ASM_X86_64_SUSPEND_H +#ifndef ASM_X86__SUSPEND_64_H +#define ASM_X86__SUSPEND_64_H #include #include @@ -49,4 +49,4 @@ extern int acpi_save_state_mem(void); extern char core_restore_code; extern char restore_registers; -#endif /* __ASM_X86_64_SUSPEND_H */ +#endif /* ASM_X86__SUSPEND_64_H */ diff --git a/include/asm-x86/swiotlb.h b/include/asm-x86/swiotlb.h index c706a74..9486c40 100644 --- a/include/asm-x86/swiotlb.h +++ b/include/asm-x86/swiotlb.h @@ -1,5 +1,5 @@ -#ifndef _ASM_SWIOTLB_H -#define _ASM_SWIOTLB_H 1 +#ifndef ASM_X86__SWIOTLB_H +#define ASM_X86__SWIOTLB_H #include @@ -55,4 +55,4 @@ static inline void pci_swiotlb_init(void) static inline void dma_mark_clean(void *addr, size_t size) {} -#endif /* _ASM_SWIOTLB_H */ +#endif /* ASM_X86__SWIOTLB_H */ diff --git a/include/asm-x86/sync_bitops.h b/include/asm-x86/sync_bitops.h index b47a1d0..b689bee 100644 --- a/include/asm-x86/sync_bitops.h +++ b/include/asm-x86/sync_bitops.h @@ -1,5 +1,5 @@ -#ifndef _I386_SYNC_BITOPS_H -#define _I386_SYNC_BITOPS_H +#ifndef ASM_X86__SYNC_BITOPS_H +#define ASM_X86__SYNC_BITOPS_H /* * Copyright 1992, Linus Torvalds. @@ -127,4 +127,4 @@ static inline int sync_test_and_change_bit(int nr, volatile unsigned long *addr) #undef ADDR -#endif /* _I386_SYNC_BITOPS_H */ +#endif /* ASM_X86__SYNC_BITOPS_H */ diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h index 983ce37..34505dd 100644 --- a/include/asm-x86/system.h +++ b/include/asm-x86/system.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SYSTEM_H_ -#define _ASM_X86_SYSTEM_H_ +#ifndef ASM_X86__SYSTEM_H +#define ASM_X86__SYSTEM_H #include #include @@ -419,4 +419,4 @@ static inline void rdtsc_barrier(void) alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); } -#endif +#endif /* ASM_X86__SYSTEM_H */ diff --git a/include/asm-x86/system_64.h b/include/asm-x86/system_64.h index 97fa251..5aedb8b 100644 --- a/include/asm-x86/system_64.h +++ b/include/asm-x86/system_64.h @@ -1,5 +1,5 @@ -#ifndef __ASM_SYSTEM_H -#define __ASM_SYSTEM_H +#ifndef ASM_X86__SYSTEM_64_H +#define ASM_X86__SYSTEM_64_H #include #include @@ -19,4 +19,4 @@ static inline void write_cr8(unsigned long val) #include -#endif +#endif /* ASM_X86__SYSTEM_64_H */ diff --git a/include/asm-x86/tce.h b/include/asm-x86/tce.h index b1a4ea0..e7932d7 100644 --- a/include/asm-x86/tce.h +++ b/include/asm-x86/tce.h @@ -21,8 +21,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef _ASM_X86_64_TCE_H -#define _ASM_X86_64_TCE_H +#ifndef ASM_X86__TCE_H +#define ASM_X86__TCE_H extern unsigned int specified_table_size; struct iommu_table; @@ -45,4 +45,4 @@ extern void * __init alloc_tce_table(void); extern void __init free_tce_table(void *tbl); extern int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar); -#endif /* _ASM_X86_64_TCE_H */ +#endif /* ASM_X86__TCE_H */ diff --git a/include/asm-x86/termbits.h b/include/asm-x86/termbits.h index af1b70e..3d00dc5 100644 --- a/include/asm-x86/termbits.h +++ b/include/asm-x86/termbits.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_TERMBITS_H -#define _ASM_X86_TERMBITS_H +#ifndef ASM_X86__TERMBITS_H +#define ASM_X86__TERMBITS_H #include @@ -195,4 +195,4 @@ struct ktermios { #define TCSADRAIN 1 #define TCSAFLUSH 2 -#endif /* _ASM_X86_TERMBITS_H */ +#endif /* ASM_X86__TERMBITS_H */ diff --git a/include/asm-x86/termios.h b/include/asm-x86/termios.h index f729563..e235db2 100644 --- a/include/asm-x86/termios.h +++ b/include/asm-x86/termios.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_TERMIOS_H -#define _ASM_X86_TERMIOS_H +#ifndef ASM_X86__TERMIOS_H +#define ASM_X86__TERMIOS_H #include #include @@ -110,4 +110,4 @@ static inline int kernel_termios_to_user_termios_1(struct termios __user *u, #endif /* __KERNEL__ */ -#endif /* _ASM_X86_TERMIOS_H */ +#endif /* ASM_X86__TERMIOS_H */ diff --git a/include/asm-x86/therm_throt.h b/include/asm-x86/therm_throt.h index 399bf60..1c7f57b 100644 --- a/include/asm-x86/therm_throt.h +++ b/include/asm-x86/therm_throt.h @@ -1,9 +1,9 @@ -#ifndef __ASM_I386_THERM_THROT_H__ -#define __ASM_I386_THERM_THROT_H__ 1 +#ifndef ASM_X86__THERM_THROT_H +#define ASM_X86__THERM_THROT_H #include extern atomic_t therm_throt_en; int therm_throt_process(int curr); -#endif /* __ASM_I386_THERM_THROT_H__ */ +#endif /* ASM_X86__THERM_THROT_H */ diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 0a8f27d..50315a5 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -4,8 +4,8 @@ * - Incorporating suggestions made by Linus Torvalds and Dave Miller */ -#ifndef _ASM_X86_THREAD_INFO_H -#define _ASM_X86_THREAD_INFO_H +#ifndef ASM_X86__THREAD_INFO_H +#define ASM_X86__THREAD_INFO_H #include #include @@ -258,4 +258,4 @@ extern void free_thread_info(struct thread_info *ti); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define arch_task_cache_init arch_task_cache_init #endif -#endif /* _ASM_X86_THREAD_INFO_H */ +#endif /* ASM_X86__THREAD_INFO_H */ diff --git a/include/asm-x86/time.h b/include/asm-x86/time.h index a17fa47..7457a5d 100644 --- a/include/asm-x86/time.h +++ b/include/asm-x86/time.h @@ -1,5 +1,5 @@ -#ifndef _ASMX86_TIME_H -#define _ASMX86_TIME_H +#ifndef ASM_X86__TIME_H +#define ASM_X86__TIME_H extern void hpet_time_init(void); @@ -58,4 +58,4 @@ static inline int native_set_wallclock(unsigned long nowtime) extern unsigned long __init calibrate_cpu(void); -#endif +#endif /* ASM_X86__TIME_H */ diff --git a/include/asm-x86/timer.h b/include/asm-x86/timer.h index fb2a4dd..b87123d 100644 --- a/include/asm-x86/timer.h +++ b/include/asm-x86/timer.h @@ -1,5 +1,5 @@ -#ifndef _ASMi386_TIMER_H -#define _ASMi386_TIMER_H +#ifndef ASM_X86__TIMER_H +#define ASM_X86__TIMER_H #include #include #include @@ -60,4 +60,4 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc) return ns; } -#endif +#endif /* ASM_X86__TIMER_H */ diff --git a/include/asm-x86/timex.h b/include/asm-x86/timex.h index 43e5a78..d1ce241 100644 --- a/include/asm-x86/timex.h +++ b/include/asm-x86/timex.h @@ -1,6 +1,6 @@ /* x86 architecture timex specifications */ -#ifndef _ASM_X86_TIMEX_H -#define _ASM_X86_TIMEX_H +#ifndef ASM_X86__TIMEX_H +#define ASM_X86__TIMEX_H #include #include @@ -16,4 +16,4 @@ #define ARCH_HAS_READ_CURRENT_TIMER -#endif +#endif /* ASM_X86__TIMEX_H */ diff --git a/include/asm-x86/tlb.h b/include/asm-x86/tlb.h index e4e9e2d..db36e9e 100644 --- a/include/asm-x86/tlb.h +++ b/include/asm-x86/tlb.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_TLB_H -#define _ASM_X86_TLB_H +#ifndef ASM_X86__TLB_H +#define ASM_X86__TLB_H #define tlb_start_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0) @@ -8,4 +8,4 @@ #include -#endif +#endif /* ASM_X86__TLB_H */ diff --git a/include/asm-x86/tlbflush.h b/include/asm-x86/tlbflush.h index 35c76ce..ef68b76 100644 --- a/include/asm-x86/tlbflush.h +++ b/include/asm-x86/tlbflush.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_TLBFLUSH_H -#define _ASM_X86_TLBFLUSH_H +#ifndef ASM_X86__TLBFLUSH_H +#define ASM_X86__TLBFLUSH_H #include #include @@ -165,4 +165,4 @@ static inline void flush_tlb_kernel_range(unsigned long start, flush_tlb_all(); } -#endif /* _ASM_X86_TLBFLUSH_H */ +#endif /* ASM_X86__TLBFLUSH_H */ diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h index 90ac771..7eca9bc 100644 --- a/include/asm-x86/topology.h +++ b/include/asm-x86/topology.h @@ -22,8 +22,8 @@ * * Send feedback to */ -#ifndef _ASM_X86_TOPOLOGY_H -#define _ASM_X86_TOPOLOGY_H +#ifndef ASM_X86__TOPOLOGY_H +#define ASM_X86__TOPOLOGY_H #ifdef CONFIG_X86_32 # ifdef CONFIG_X86_HT @@ -255,4 +255,4 @@ static inline void set_mp_bus_to_node(int busnum, int node) } #endif -#endif /* _ASM_X86_TOPOLOGY_H */ +#endif /* ASM_X86__TOPOLOGY_H */ diff --git a/include/asm-x86/trampoline.h b/include/asm-x86/trampoline.h index b156b08..0406bbd 100644 --- a/include/asm-x86/trampoline.h +++ b/include/asm-x86/trampoline.h @@ -1,5 +1,5 @@ -#ifndef __TRAMPOLINE_HEADER -#define __TRAMPOLINE_HEADER +#ifndef ASM_X86__TRAMPOLINE_H +#define ASM_X86__TRAMPOLINE_H #ifndef __ASSEMBLY__ @@ -18,4 +18,4 @@ extern unsigned long setup_trampoline(void); #endif /* __ASSEMBLY__ */ -#endif /* __TRAMPOLINE_HEADER */ +#endif /* ASM_X86__TRAMPOLINE_H */ diff --git a/include/asm-x86/traps.h b/include/asm-x86/traps.h index a4b65a7..2a891a7 100644 --- a/include/asm-x86/traps.h +++ b/include/asm-x86/traps.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_TRAPS_H -#define _ASM_X86_TRAPS_H +#ifndef ASM_X86__TRAPS_H +#define ASM_X86__TRAPS_H /* Common in X86_32 and X86_64 */ asmlinkage void divide_error(void); @@ -63,4 +63,4 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *); asmlinkage void do_spurious_interrupt_bug(struct pt_regs *); #endif /* CONFIG_X86_32 */ -#endif /* _ASM_X86_TRAPS_H */ +#endif /* ASM_X86__TRAPS_H */ diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h index cb6f6ee..ad0f5c4 100644 --- a/include/asm-x86/tsc.h +++ b/include/asm-x86/tsc.h @@ -1,8 +1,8 @@ /* * x86 TSC related functions */ -#ifndef _ASM_X86_TSC_H -#define _ASM_X86_TSC_H +#ifndef ASM_X86__TSC_H +#define ASM_X86__TSC_H #include @@ -59,4 +59,4 @@ extern void check_tsc_sync_target(void); extern int notsc_setup(char *); -#endif +#endif /* ASM_X86__TSC_H */ diff --git a/include/asm-x86/types.h b/include/asm-x86/types.h index 1ac80cd..e78b52e 100644 --- a/include/asm-x86/types.h +++ b/include/asm-x86/types.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_TYPES_H -#define _ASM_X86_TYPES_H +#ifndef ASM_X86__TYPES_H +#define ASM_X86__TYPES_H #include @@ -33,4 +33,4 @@ typedef u32 dma_addr_t; #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ -#endif +#endif /* ASM_X86__TYPES_H */ diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h index f6fa4d8..1838f39 100644 --- a/include/asm-x86/uaccess.h +++ b/include/asm-x86/uaccess.h @@ -1,5 +1,5 @@ -#ifndef _ASM_UACCES_H_ -#define _ASM_UACCES_H_ +#ifndef ASM_X86__UACCESS_H +#define ASM_X86__UACCESS_H /* * User space memory access functions */ @@ -450,4 +450,4 @@ extern struct movsl_mask { # include "uaccess_64.h" #endif -#endif +#endif /* ASM_X86__UACCESS_H */ diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h index 6fdef39..6b5b57d 100644 --- a/include/asm-x86/uaccess_32.h +++ b/include/asm-x86/uaccess_32.h @@ -1,5 +1,5 @@ -#ifndef __i386_UACCESS_H -#define __i386_UACCESS_H +#ifndef ASM_X86__UACCESS_32_H +#define ASM_X86__UACCESS_32_H /* * User space memory access functions @@ -215,4 +215,4 @@ long strnlen_user(const char __user *str, long n); unsigned long __must_check clear_user(void __user *mem, unsigned long len); unsigned long __must_check __clear_user(void __user *mem, unsigned long len); -#endif /* __i386_UACCESS_H */ +#endif /* ASM_X86__UACCESS_32_H */ diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h index 515d4dc..5cfd295 100644 --- a/include/asm-x86/uaccess_64.h +++ b/include/asm-x86/uaccess_64.h @@ -1,5 +1,5 @@ -#ifndef __X86_64_UACCESS_H -#define __X86_64_UACCESS_H +#ifndef ASM_X86__UACCESS_64_H +#define ASM_X86__UACCESS_64_H /* * User space memory access functions @@ -198,4 +198,4 @@ static inline int __copy_from_user_inatomic_nocache(void *dst, unsigned long copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest); -#endif /* __X86_64_UACCESS_H */ +#endif /* ASM_X86__UACCESS_64_H */ diff --git a/include/asm-x86/ucontext.h b/include/asm-x86/ucontext.h index 50a79f7..9948dd3 100644 --- a/include/asm-x86/ucontext.h +++ b/include/asm-x86/ucontext.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_UCONTEXT_H -#define _ASM_X86_UCONTEXT_H +#ifndef ASM_X86__UCONTEXT_H +#define ASM_X86__UCONTEXT_H struct ucontext { unsigned long uc_flags; @@ -9,4 +9,4 @@ struct ucontext { sigset_t uc_sigmask; /* mask last for extensibility */ }; -#endif /* _ASM_X86_UCONTEXT_H */ +#endif /* ASM_X86__UCONTEXT_H */ diff --git a/include/asm-x86/unaligned.h b/include/asm-x86/unaligned.h index a7bd416..59dcdec 100644 --- a/include/asm-x86/unaligned.h +++ b/include/asm-x86/unaligned.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_UNALIGNED_H -#define _ASM_X86_UNALIGNED_H +#ifndef ASM_X86__UNALIGNED_H +#define ASM_X86__UNALIGNED_H /* * The x86 can do unaligned accesses itself. @@ -11,4 +11,4 @@ #define get_unaligned __get_unaligned_le #define put_unaligned __put_unaligned_le -#endif /* _ASM_X86_UNALIGNED_H */ +#endif /* ASM_X86__UNALIGNED_H */ diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h index 8317d94..602ddd8 100644 --- a/include/asm-x86/unistd_32.h +++ b/include/asm-x86/unistd_32.h @@ -1,5 +1,5 @@ -#ifndef _ASM_I386_UNISTD_H_ -#define _ASM_I386_UNISTD_H_ +#ifndef ASM_X86__UNISTD_32_H +#define ASM_X86__UNISTD_32_H /* * This file contains the system call numbers. @@ -370,4 +370,4 @@ #endif #endif /* __KERNEL__ */ -#endif /* _ASM_I386_UNISTD_H_ */ +#endif /* ASM_X86__UNISTD_32_H */ diff --git a/include/asm-x86/unwind.h b/include/asm-x86/unwind.h index 8b064bd..a215156 100644 --- a/include/asm-x86/unwind.h +++ b/include/asm-x86/unwind.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_UNWIND_H -#define _ASM_X86_UNWIND_H +#ifndef ASM_X86__UNWIND_H +#define ASM_X86__UNWIND_H #define UNW_PC(frame) ((void)(frame), 0UL) #define UNW_SP(frame) ((void)(frame), 0UL) @@ -10,4 +10,4 @@ static inline int arch_unw_user_mode(const void *info) return 0; } -#endif /* _ASM_X86_UNWIND_H */ +#endif /* ASM_X86__UNWIND_H */ diff --git a/include/asm-x86/user32.h b/include/asm-x86/user32.h index a3d9100..aa66c18 100644 --- a/include/asm-x86/user32.h +++ b/include/asm-x86/user32.h @@ -1,5 +1,5 @@ -#ifndef USER32_H -#define USER32_H 1 +#ifndef ASM_X86__USER32_H +#define ASM_X86__USER32_H /* IA32 compatible user structures for ptrace. * These should be used for 32bit coredumps too. */ @@ -67,4 +67,4 @@ struct user32 { }; -#endif +#endif /* ASM_X86__USER32_H */ diff --git a/include/asm-x86/user_32.h b/include/asm-x86/user_32.h index d6e51ed..e0fe2f5 100644 --- a/include/asm-x86/user_32.h +++ b/include/asm-x86/user_32.h @@ -1,5 +1,5 @@ -#ifndef _I386_USER_H -#define _I386_USER_H +#ifndef ASM_X86__USER_32_H +#define ASM_X86__USER_32_H #include /* Core file format: The core file is written in such a way that gdb @@ -128,4 +128,4 @@ struct user{ #define HOST_TEXT_START_ADDR (u.start_code) #define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) -#endif /* _I386_USER_H */ +#endif /* ASM_X86__USER_32_H */ diff --git a/include/asm-x86/user_64.h b/include/asm-x86/user_64.h index 6037b63..38b5799 100644 --- a/include/asm-x86/user_64.h +++ b/include/asm-x86/user_64.h @@ -1,5 +1,5 @@ -#ifndef _X86_64_USER_H -#define _X86_64_USER_H +#ifndef ASM_X86__USER_64_H +#define ASM_X86__USER_64_H #include #include @@ -134,4 +134,4 @@ struct user { #define HOST_TEXT_START_ADDR (u.start_code) #define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) -#endif /* _X86_64_USER_H */ +#endif /* ASM_X86__USER_64_H */ diff --git a/include/asm-x86/uv/bios.h b/include/asm-x86/uv/bios.h index aa73362..7cd6d7e 100644 --- a/include/asm-x86/uv/bios.h +++ b/include/asm-x86/uv/bios.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_BIOS_H -#define _ASM_X86_BIOS_H +#ifndef ASM_X86__UV__BIOS_H +#define ASM_X86__UV__BIOS_H /* * BIOS layer definitions. @@ -65,4 +65,4 @@ x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second, unsigned long *drift_info); extern const char *x86_bios_strerror(long status); -#endif /* _ASM_X86_BIOS_H */ +#endif /* ASM_X86__UV__BIOS_H */ diff --git a/include/asm-x86/uv/uv_bau.h b/include/asm-x86/uv/uv_bau.h index 91ac0df..0950239 100644 --- a/include/asm-x86/uv/uv_bau.h +++ b/include/asm-x86/uv/uv_bau.h @@ -8,8 +8,8 @@ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. */ -#ifndef __ASM_X86_UV_BAU__ -#define __ASM_X86_UV_BAU__ +#ifndef ASM_X86__UV__UV_BAU_H +#define ASM_X86__UV__UV_BAU_H #include #define BITSPERBYTE 8 @@ -334,4 +334,4 @@ extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long); extern void uv_bau_message_intr1(void); extern void uv_bau_timeout_intr1(void); -#endif /* __ASM_X86_UV_BAU__ */ +#endif /* ASM_X86__UV__UV_BAU_H */ diff --git a/include/asm-x86/uv/uv_hub.h b/include/asm-x86/uv/uv_hub.h index a4ef26e..bdb5b01 100644 --- a/include/asm-x86/uv/uv_hub.h +++ b/include/asm-x86/uv/uv_hub.h @@ -8,8 +8,8 @@ * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. */ -#ifndef __ASM_X86_UV_HUB_H__ -#define __ASM_X86_UV_HUB_H__ +#ifndef ASM_X86__UV__UV_HUB_H +#define ASM_X86__UV__UV_HUB_H #include #include @@ -350,5 +350,5 @@ static inline int uv_num_possible_blades(void) return uv_possible_blades; } -#endif /* __ASM_X86_UV_HUB__ */ +#endif /* ASM_X86__UV__UV_HUB_H */ diff --git a/include/asm-x86/uv/uv_mmrs.h b/include/asm-x86/uv/uv_mmrs.h index 151fd7f..8b03d89 100644 --- a/include/asm-x86/uv/uv_mmrs.h +++ b/include/asm-x86/uv/uv_mmrs.h @@ -8,8 +8,8 @@ * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. */ -#ifndef __ASM_X86_UV_MMRS__ -#define __ASM_X86_UV_MMRS__ +#ifndef ASM_X86__UV__UV_MMRS_H +#define ASM_X86__UV__UV_MMRS_H #define UV_MMR_ENABLE (1UL << 63) @@ -1292,4 +1292,4 @@ union uvh_si_alias2_overlay_config_u { }; -#endif /* __ASM_X86_UV_MMRS__ */ +#endif /* ASM_X86__UV__UV_MMRS_H */ diff --git a/include/asm-x86/vdso.h b/include/asm-x86/vdso.h index 8e18fb8..4ab3209 100644 --- a/include/asm-x86/vdso.h +++ b/include/asm-x86/vdso.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_VDSO_H -#define _ASM_X86_VDSO_H 1 +#ifndef ASM_X86__VDSO_H +#define ASM_X86__VDSO_H #ifdef CONFIG_X86_64 extern const char VDSO64_PRELINK[]; @@ -44,4 +44,4 @@ extern const char vdso32_int80_start, vdso32_int80_end; extern const char vdso32_syscall_start, vdso32_syscall_end; extern const char vdso32_sysenter_start, vdso32_sysenter_end; -#endif /* asm-x86/vdso.h */ +#endif /* ASM_X86__VDSO_H */ diff --git a/include/asm-x86/vga.h b/include/asm-x86/vga.h index 0ccf804..b9e493d 100644 --- a/include/asm-x86/vga.h +++ b/include/asm-x86/vga.h @@ -4,8 +4,8 @@ * (c) 1998 Martin Mares */ -#ifndef _LINUX_ASM_VGA_H_ -#define _LINUX_ASM_VGA_H_ +#ifndef ASM_X86__VGA_H +#define ASM_X86__VGA_H /* * On the PC, we can just recalculate addresses and then @@ -17,4 +17,4 @@ #define vga_readb(x) (*(x)) #define vga_writeb(x, y) (*(y) = (x)) -#endif +#endif /* ASM_X86__VGA_H */ diff --git a/include/asm-x86/vgtod.h b/include/asm-x86/vgtod.h index 3301f09..38fd133 100644 --- a/include/asm-x86/vgtod.h +++ b/include/asm-x86/vgtod.h @@ -1,5 +1,5 @@ -#ifndef _ASM_VGTOD_H -#define _ASM_VGTOD_H 1 +#ifndef ASM_X86__VGTOD_H +#define ASM_X86__VGTOD_H #include #include @@ -26,4 +26,4 @@ extern struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data; extern struct vsyscall_gtod_data vsyscall_gtod_data; -#endif +#endif /* ASM_X86__VGTOD_H */ diff --git a/include/asm-x86/visws/cobalt.h b/include/asm-x86/visws/cobalt.h index 9952588..9627a8f 100644 --- a/include/asm-x86/visws/cobalt.h +++ b/include/asm-x86/visws/cobalt.h @@ -1,5 +1,5 @@ -#ifndef __I386_SGI_COBALT_H -#define __I386_SGI_COBALT_H +#ifndef ASM_X86__VISWS__COBALT_H +#define ASM_X86__VISWS__COBALT_H #include @@ -122,4 +122,4 @@ extern char visws_board_type; extern char visws_board_rev; -#endif /* __I386_SGI_COBALT_H */ +#endif /* ASM_X86__VISWS__COBALT_H */ diff --git a/include/asm-x86/visws/lithium.h b/include/asm-x86/visws/lithium.h index dfcd4f0..b36d3b3 100644 --- a/include/asm-x86/visws/lithium.h +++ b/include/asm-x86/visws/lithium.h @@ -1,5 +1,5 @@ -#ifndef __I386_SGI_LITHIUM_H -#define __I386_SGI_LITHIUM_H +#ifndef ASM_X86__VISWS__LITHIUM_H +#define ASM_X86__VISWS__LITHIUM_H #include @@ -49,5 +49,5 @@ static inline unsigned short li_pcib_read16(unsigned long reg) return *((volatile unsigned short *)(LI_PCIB_VADDR+reg)); } -#endif +#endif /* ASM_X86__VISWS__LITHIUM_H */ diff --git a/include/asm-x86/visws/piix4.h b/include/asm-x86/visws/piix4.h index 83ea4f4..61c9380 100644 --- a/include/asm-x86/visws/piix4.h +++ b/include/asm-x86/visws/piix4.h @@ -1,5 +1,5 @@ -#ifndef __I386_SGI_PIIX_H -#define __I386_SGI_PIIX_H +#ifndef ASM_X86__VISWS__PIIX4_H +#define ASM_X86__VISWS__PIIX4_H /* * PIIX4 as used on SGI Visual Workstations @@ -104,4 +104,4 @@ */ #define PIIX_GPI_STPCLK 0x4 // STPCLK signal routed back in -#endif +#endif /* ASM_X86__VISWS__PIIX4_H */ diff --git a/include/asm-x86/vm86.h b/include/asm-x86/vm86.h index 5ce3513..998bd18 100644 --- a/include/asm-x86/vm86.h +++ b/include/asm-x86/vm86.h @@ -1,5 +1,5 @@ -#ifndef _LINUX_VM86_H -#define _LINUX_VM86_H +#ifndef ASM_X86__VM86_H +#define ASM_X86__VM86_H /* * I'm guessing at the VIF/VIP flag usage, but hope that this is how @@ -205,4 +205,4 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c) #endif /* __KERNEL__ */ -#endif +#endif /* ASM_X86__VM86_H */ diff --git a/include/asm-x86/vmi_time.h b/include/asm-x86/vmi_time.h index c3118c3..b2d39e6 100644 --- a/include/asm-x86/vmi_time.h +++ b/include/asm-x86/vmi_time.h @@ -22,8 +22,8 @@ * */ -#ifndef __VMI_TIME_H -#define __VMI_TIME_H +#ifndef ASM_X86__VMI_TIME_H +#define ASM_X86__VMI_TIME_H /* * Raw VMI call indices for timer functions @@ -95,4 +95,4 @@ extern void __devinit vmi_time_ap_init(void); #define CONFIG_VMI_ALARM_HZ 100 -#endif +#endif /* ASM_X86__VMI_TIME_H */ diff --git a/include/asm-x86/vsyscall.h b/include/asm-x86/vsyscall.h index 6b66ff9..dcd4682 100644 --- a/include/asm-x86/vsyscall.h +++ b/include/asm-x86/vsyscall.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_64_VSYSCALL_H_ -#define _ASM_X86_64_VSYSCALL_H_ +#ifndef ASM_X86__VSYSCALL_H +#define ASM_X86__VSYSCALL_H enum vsyscall_num { __NR_vgettimeofday, @@ -41,4 +41,4 @@ extern void map_vsyscall(void); #endif /* __KERNEL__ */ -#endif /* _ASM_X86_64_VSYSCALL_H_ */ +#endif /* ASM_X86__VSYSCALL_H */ diff --git a/include/asm-x86/xen/events.h b/include/asm-x86/xen/events.h index 8ded747..8151f5b 100644 --- a/include/asm-x86/xen/events.h +++ b/include/asm-x86/xen/events.h @@ -1,5 +1,5 @@ -#ifndef __XEN_EVENTS_H -#define __XEN_EVENTS_H +#ifndef ASM_X86__XEN__EVENTS_H +#define ASM_X86__XEN__EVENTS_H enum ipi_vector { XEN_RESCHEDULE_VECTOR, @@ -21,4 +21,4 @@ static inline void xen_do_IRQ(int irq, struct pt_regs *regs) do_IRQ(regs); } -#endif /* __XEN_EVENTS_H */ +#endif /* ASM_X86__XEN__EVENTS_H */ diff --git a/include/asm-x86/xen/grant_table.h b/include/asm-x86/xen/grant_table.h index 2444d45..c4baab4 100644 --- a/include/asm-x86/xen/grant_table.h +++ b/include/asm-x86/xen/grant_table.h @@ -1,7 +1,7 @@ -#ifndef __XEN_GRANT_TABLE_H -#define __XEN_GRANT_TABLE_H +#ifndef ASM_X86__XEN__GRANT_TABLE_H +#define ASM_X86__XEN__GRANT_TABLE_H #define xen_alloc_vm_area(size) alloc_vm_area(size) #define xen_free_vm_area(area) free_vm_area(area) -#endif /* __XEN_GRANT_TABLE_H */ +#endif /* ASM_X86__XEN__GRANT_TABLE_H */ diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h index 91cb7fd..44f4259 100644 --- a/include/asm-x86/xen/hypercall.h +++ b/include/asm-x86/xen/hypercall.h @@ -30,8 +30,8 @@ * IN THE SOFTWARE. */ -#ifndef __HYPERCALL_H__ -#define __HYPERCALL_H__ +#ifndef ASM_X86__XEN__HYPERCALL_H +#define ASM_X86__XEN__HYPERCALL_H #include #include @@ -524,4 +524,4 @@ MULTI_stack_switch(struct multicall_entry *mcl, mcl->args[1] = esp; } -#endif /* __HYPERCALL_H__ */ +#endif /* ASM_X86__XEN__HYPERCALL_H */ diff --git a/include/asm-x86/xen/hypervisor.h b/include/asm-x86/xen/hypervisor.h index 8e15dd2..06c3504 100644 --- a/include/asm-x86/xen/hypervisor.h +++ b/include/asm-x86/xen/hypervisor.h @@ -30,8 +30,8 @@ * IN THE SOFTWARE. */ -#ifndef __HYPERVISOR_H__ -#define __HYPERVISOR_H__ +#ifndef ASM_X86__XEN__HYPERVISOR_H +#define ASM_X86__XEN__HYPERVISOR_H #include #include @@ -70,4 +70,4 @@ u64 jiffies_to_st(unsigned long jiffies); #define is_running_on_xen() (xen_start_info ? 1 : 0) -#endif /* __HYPERVISOR_H__ */ +#endif /* ASM_X86__XEN__HYPERVISOR_H */ diff --git a/include/asm-x86/xen/interface.h b/include/asm-x86/xen/interface.h index 9d810f2..d077bba 100644 --- a/include/asm-x86/xen/interface.h +++ b/include/asm-x86/xen/interface.h @@ -6,8 +6,8 @@ * Copyright (c) 2004, K A Fraser */ -#ifndef __ASM_X86_XEN_INTERFACE_H -#define __ASM_X86_XEN_INTERFACE_H +#ifndef ASM_X86__XEN__INTERFACE_H +#define ASM_X86__XEN__INTERFACE_H #ifdef __XEN__ #define __DEFINE_GUEST_HANDLE(name, type) \ @@ -172,4 +172,4 @@ DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context); #define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" #endif -#endif /* __ASM_X86_XEN_INTERFACE_H */ +#endif /* ASM_X86__XEN__INTERFACE_H */ diff --git a/include/asm-x86/xen/interface_32.h b/include/asm-x86/xen/interface_32.h index d8ac41d..08167e1 100644 --- a/include/asm-x86/xen/interface_32.h +++ b/include/asm-x86/xen/interface_32.h @@ -6,8 +6,8 @@ * Copyright (c) 2004, K A Fraser */ -#ifndef __ASM_X86_XEN_INTERFACE_32_H -#define __ASM_X86_XEN_INTERFACE_32_H +#ifndef ASM_X86__XEN__INTERFACE_32_H +#define ASM_X86__XEN__INTERFACE_32_H /* @@ -94,4 +94,4 @@ typedef struct xen_callback xen_callback_t; #define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) #define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) -#endif /* __ASM_X86_XEN_INTERFACE_32_H */ +#endif /* ASM_X86__XEN__INTERFACE_32_H */ diff --git a/include/asm-x86/xen/interface_64.h b/include/asm-x86/xen/interface_64.h index 842266c..046c0f1 100644 --- a/include/asm-x86/xen/interface_64.h +++ b/include/asm-x86/xen/interface_64.h @@ -1,5 +1,5 @@ -#ifndef __ASM_X86_XEN_INTERFACE_64_H -#define __ASM_X86_XEN_INTERFACE_64_H +#ifndef ASM_X86__XEN__INTERFACE_64_H +#define ASM_X86__XEN__INTERFACE_64_H /* * 64-bit segment selectors @@ -156,4 +156,4 @@ typedef unsigned long xen_callback_t; #endif /* !__ASSEMBLY__ */ -#endif /* __ASM_X86_XEN_INTERFACE_64_H */ +#endif /* ASM_X86__XEN__INTERFACE_64_H */ diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h index 05e678a..a17a864 100644 --- a/include/asm-x86/xen/page.h +++ b/include/asm-x86/xen/page.h @@ -1,5 +1,5 @@ -#ifndef __XEN_PAGE_H -#define __XEN_PAGE_H +#ifndef ASM_X86__XEN__PAGE_H +#define ASM_X86__XEN__PAGE_H #include @@ -162,4 +162,4 @@ xmaddr_t arbitrary_virt_to_machine(void *address); void make_lowmem_page_readonly(void *vaddr); void make_lowmem_page_readwrite(void *vaddr); -#endif /* __XEN_PAGE_H */ +#endif /* ASM_X86__XEN__PAGE_H */ -- cgit v0.10.2 From 5616c23ad9cd3c50af674d408fef7b90abeee81c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 22 Jul 2008 15:32:38 -0400 Subject: x86: doc: move x86-generic documentation from Doc/x86/i386 The boot protocol, USB legacy support, and zero-page documentation is common to the x86 platform, not i386-specific. Signed-off-by: H. Peter Anvin diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt new file mode 100644 index 0000000..147bfe5 --- /dev/null +++ b/Documentation/x86/boot.txt @@ -0,0 +1,900 @@ + THE LINUX/x86 BOOT PROTOCOL + --------------------------- + +On the x86 platform, the Linux kernel uses a rather complicated boot +convention. This has evolved partially due to historical aspects, as +well as the desire in the early days to have the kernel itself be a +bootable image, the complicated PC memory model and due to changed +expectations in the PC industry caused by the effective demise of +real-mode DOS as a mainstream operating system. + +Currently, the following versions of the Linux/x86 boot protocol exist. + +Old kernels: zImage/Image support only. Some very early kernels + may not even support a command line. + +Protocol 2.00: (Kernel 1.3.73) Added bzImage and initrd support, as + well as a formalized way to communicate between the + boot loader and the kernel. setup.S made relocatable, + although the traditional setup area still assumed + writable. + +Protocol 2.01: (Kernel 1.3.76) Added a heap overrun warning. + +Protocol 2.02: (Kernel 2.4.0-test3-pre3) New command line protocol. + Lower the conventional memory ceiling. No overwrite + of the traditional setup area, thus making booting + safe for systems which use the EBDA from SMM or 32-bit + BIOS entry points. zImage deprecated but still + supported. + +Protocol 2.03: (Kernel 2.4.18-pre1) Explicitly makes the highest possible + initrd address available to the bootloader. + +Protocol 2.04: (Kernel 2.6.14) Extend the syssize field to four bytes. + +Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable. + Introduce relocatable_kernel and kernel_alignment fields. + +Protocol 2.06: (Kernel 2.6.22) Added a field that contains the size of + the boot command line. + +Protocol 2.07: (Kernel 2.6.24) Added paravirtualised boot protocol. + Introduced hardware_subarch and hardware_subarch_data + and KEEP_SEGMENTS flag in load_flags. + +Protocol 2.08: (Kernel 2.6.26) Added crc32 checksum and ELF format + payload. Introduced payload_offset and payload length + fields to aid in locating the payload. + +Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical + pointer to single linked list of struct setup_data. + +**** MEMORY LAYOUT + +The traditional memory map for the kernel loader, used for Image or +zImage kernels, typically looks like: + + | | +0A0000 +------------------------+ + | Reserved for BIOS | Do not use. Reserved for BIOS EBDA. +09A000 +------------------------+ + | Command line | + | Stack/heap | For use by the kernel real-mode code. +098000 +------------------------+ + | Kernel setup | The kernel real-mode code. +090200 +------------------------+ + | Kernel boot sector | The kernel legacy boot sector. +090000 +------------------------+ + | Protected-mode kernel | The bulk of the kernel image. +010000 +------------------------+ + | Boot loader | <- Boot sector entry point 0000:7C00 +001000 +------------------------+ + | Reserved for MBR/BIOS | +000800 +------------------------+ + | Typically used by MBR | +000600 +------------------------+ + | BIOS use only | +000000 +------------------------+ + + +When using bzImage, the protected-mode kernel was relocated to +0x100000 ("high memory"), and the kernel real-mode block (boot sector, +setup, and stack/heap) was made relocatable to any address between +0x10000 and end of low memory. Unfortunately, in protocols 2.00 and +2.01 the 0x90000+ memory range is still used internally by the kernel; +the 2.02 protocol resolves that problem. + +It is desirable to keep the "memory ceiling" -- the highest point in +low memory touched by the boot loader -- as low as possible, since +some newer BIOSes have begun to allocate some rather large amounts of +memory, called the Extended BIOS Data Area, near the top of low +memory. The boot loader should use the "INT 12h" BIOS call to verify +how much low memory is available. + +Unfortunately, if INT 12h reports that the amount of memory is too +low, there is usually nothing the boot loader can do but to report an +error to the user. The boot loader should therefore be designed to +take up as little space in low memory as it reasonably can. For +zImage or old bzImage kernels, which need data written into the +0x90000 segment, the boot loader should make sure not to use memory +above the 0x9A000 point; too many BIOSes will break above that point. + +For a modern bzImage kernel with boot protocol version >= 2.02, a +memory layout like the following is suggested: + + ~ ~ + | Protected-mode kernel | +100000 +------------------------+ + | I/O memory hole | +0A0000 +------------------------+ + | Reserved for BIOS | Leave as much as possible unused + ~ ~ + | Command line | (Can also be below the X+10000 mark) +X+10000 +------------------------+ + | Stack/heap | For use by the kernel real-mode code. +X+08000 +------------------------+ + | Kernel setup | The kernel real-mode code. + | Kernel boot sector | The kernel legacy boot sector. +X +------------------------+ + | Boot loader | <- Boot sector entry point 0000:7C00 +001000 +------------------------+ + | Reserved for MBR/BIOS | +000800 +------------------------+ + | Typically used by MBR | +000600 +------------------------+ + | BIOS use only | +000000 +------------------------+ + +... where the address X is as low as the design of the boot loader +permits. + + +**** THE REAL-MODE KERNEL HEADER + +In the following text, and anywhere in the kernel boot sequence, "a +sector" refers to 512 bytes. It is independent of the actual sector +size of the underlying medium. + +The first step in loading a Linux kernel should be to load the +real-mode code (boot sector and setup code) and then examine the +following header at offset 0x01f1. The real-mode code can total up to +32K, although the boot loader may choose to load only the first two +sectors (1K) and then examine the bootup sector size. + +The header looks like: + +Offset Proto Name Meaning +/Size + +01F1/1 ALL(1 setup_sects The size of the setup in sectors +01F2/2 ALL root_flags If set, the root is mounted readonly +01F4/4 2.04+(2 syssize The size of the 32-bit code in 16-byte paras +01F8/2 ALL ram_size DO NOT USE - for bootsect.S use only +01FA/2 ALL vid_mode Video mode control +01FC/2 ALL root_dev Default root device number +01FE/2 ALL boot_flag 0xAA55 magic number +0200/2 2.00+ jump Jump instruction +0202/4 2.00+ header Magic signature "HdrS" +0206/2 2.00+ version Boot protocol version supported +0208/4 2.00+ realmode_swtch Boot loader hook (see below) +020C/2 2.00+ start_sys The load-low segment (0x1000) (obsolete) +020E/2 2.00+ kernel_version Pointer to kernel version string +0210/1 2.00+ type_of_loader Boot loader identifier +0211/1 2.00+ loadflags Boot protocol option flags +0212/2 2.00+ setup_move_size Move to high memory size (used with hooks) +0214/4 2.00+ code32_start Boot loader hook (see below) +0218/4 2.00+ ramdisk_image initrd load address (set by boot loader) +021C/4 2.00+ ramdisk_size initrd size (set by boot loader) +0220/4 2.00+ bootsect_kludge DO NOT USE - for bootsect.S use only +0224/2 2.01+ heap_end_ptr Free memory after setup end +0226/2 N/A pad1 Unused +0228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line +022C/4 2.03+ initrd_addr_max Highest legal initrd address +0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel +0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not +0235/3 N/A pad2 Unused +0238/4 2.06+ cmdline_size Maximum size of the kernel command line +023C/4 2.07+ hardware_subarch Hardware subarchitecture +0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data +0248/4 2.08+ payload_offset Offset of kernel payload +024C/4 2.08+ payload_length Length of kernel payload +0250/8 2.09+ setup_data 64-bit physical pointer to linked list + of struct setup_data + +(1) For backwards compatibility, if the setup_sects field contains 0, the + real value is 4. + +(2) For boot protocol prior to 2.04, the upper two bytes of the syssize + field are unusable, which means the size of a bzImage kernel + cannot be determined. + +If the "HdrS" (0x53726448) magic number is not found at offset 0x202, +the boot protocol version is "old". Loading an old kernel, the +following parameters should be assumed: + + Image type = zImage + initrd not supported + Real-mode kernel must be located at 0x90000. + +Otherwise, the "version" field contains the protocol version, +e.g. protocol version 2.01 will contain 0x0201 in this field. When +setting fields in the header, you must make sure only to set fields +supported by the protocol version in use. + + +**** DETAILS OF HEADER FIELDS + +For each field, some are information from the kernel to the bootloader +("read"), some are expected to be filled out by the bootloader +("write"), and some are expected to be read and modified by the +bootloader ("modify"). + +All general purpose boot loaders should write the fields marked +(obligatory). Boot loaders who want to load the kernel at a +nonstandard address should fill in the fields marked (reloc); other +boot loaders can ignore those fields. + +The byte order of all fields is littleendian (this is x86, after all.) + +Field name: setup_sects +Type: read +Offset/size: 0x1f1/1 +Protocol: ALL + + The size of the setup code in 512-byte sectors. If this field is + 0, the real value is 4. The real-mode code consists of the boot + sector (always one 512-byte sector) plus the setup code. + +Field name: root_flags +Type: modify (optional) +Offset/size: 0x1f2/2 +Protocol: ALL + + If this field is nonzero, the root defaults to readonly. The use of + this field is deprecated; use the "ro" or "rw" options on the + command line instead. + +Field name: syssize +Type: read +Offset/size: 0x1f4/4 (protocol 2.04+) 0x1f4/2 (protocol ALL) +Protocol: 2.04+ + + The size of the protected-mode code in units of 16-byte paragraphs. + For protocol versions older than 2.04 this field is only two bytes + wide, and therefore cannot be trusted for the size of a kernel if + the LOAD_HIGH flag is set. + +Field name: ram_size +Type: kernel internal +Offset/size: 0x1f8/2 +Protocol: ALL + + This field is obsolete. + +Field name: vid_mode +Type: modify (obligatory) +Offset/size: 0x1fa/2 + + Please see the section on SPECIAL COMMAND LINE OPTIONS. + +Field name: root_dev +Type: modify (optional) +Offset/size: 0x1fc/2 +Protocol: ALL + + The default root device device number. The use of this field is + deprecated, use the "root=" option on the command line instead. + +Field name: boot_flag +Type: read +Offset/size: 0x1fe/2 +Protocol: ALL + + Contains 0xAA55. This is the closest thing old Linux kernels have + to a magic number. + +Field name: jump +Type: read +Offset/size: 0x200/2 +Protocol: 2.00+ + + Contains an x86 jump instruction, 0xEB followed by a signed offset + relative to byte 0x202. This can be used to determine the size of + the header. + +Field name: header +Type: read +Offset/size: 0x202/4 +Protocol: 2.00+ + + Contains the magic number "HdrS" (0x53726448). + +Field name: version +Type: read +Offset/size: 0x206/2 +Protocol: 2.00+ + + Contains the boot protocol version, in (major << 8)+minor format, + e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version + 10.17. + +Field name: readmode_swtch +Type: modify (optional) +Offset/size: 0x208/4 +Protocol: 2.00+ + + Boot loader hook (see ADVANCED BOOT LOADER HOOKS below.) + +Field name: start_sys +Type: read +Offset/size: 0x20c/4 +Protocol: 2.00+ + + The load low segment (0x1000). Obsolete. + +Field name: kernel_version +Type: read +Offset/size: 0x20e/2 +Protocol: 2.00+ + + If set to a nonzero value, contains a pointer to a NUL-terminated + human-readable kernel version number string, less 0x200. This can + be used to display the kernel version to the user. This value + should be less than (0x200*setup_sects). + + For example, if this value is set to 0x1c00, the kernel version + number string can be found at offset 0x1e00 in the kernel file. + This is a valid value if and only if the "setup_sects" field + contains the value 15 or higher, as: + + 0x1c00 < 15*0x200 (= 0x1e00) but + 0x1c00 >= 14*0x200 (= 0x1c00) + + 0x1c00 >> 9 = 14, so the minimum value for setup_secs is 15. + +Field name: type_of_loader +Type: write (obligatory) +Offset/size: 0x210/1 +Protocol: 2.00+ + + If your boot loader has an assigned id (see table below), enter + 0xTV here, where T is an identifier for the boot loader and V is + a version number. Otherwise, enter 0xFF here. + + Assigned boot loader ids: + 0 LILO (0x00 reserved for pre-2.00 bootloader) + 1 Loadlin + 2 bootsect-loader (0x20, all other values reserved) + 3 SYSLINUX + 4 EtherBoot + 5 ELILO + 7 GRuB + 8 U-BOOT + 9 Xen + A Gujin + B Qemu + + Please contact if you need a bootloader ID + value assigned. + +Field name: loadflags +Type: modify (obligatory) +Offset/size: 0x211/1 +Protocol: 2.00+ + + This field is a bitmask. + + Bit 0 (read): LOADED_HIGH + - If 0, the protected-mode code is loaded at 0x10000. + - If 1, the protected-mode code is loaded at 0x100000. + + Bit 5 (write): QUIET_FLAG + - If 0, print early messages. + - If 1, suppress early messages. + This requests to the kernel (decompressor and early + kernel) to not write early messages that require + accessing the display hardware directly. + + Bit 6 (write): KEEP_SEGMENTS + Protocol: 2.07+ + - If 0, reload the segment registers in the 32bit entry point. + - If 1, do not reload the segment registers in the 32bit entry point. + Assume that %cs %ds %ss %es are all set to flat segments with + a base of 0 (or the equivalent for their environment). + + Bit 7 (write): CAN_USE_HEAP + Set this bit to 1 to indicate that the value entered in the + heap_end_ptr is valid. If this field is clear, some setup code + functionality will be disabled. + +Field name: setup_move_size +Type: modify (obligatory) +Offset/size: 0x212/2 +Protocol: 2.00-2.01 + + When using protocol 2.00 or 2.01, if the real mode kernel is not + loaded at 0x90000, it gets moved there later in the loading + sequence. Fill in this field if you want additional data (such as + the kernel command line) moved in addition to the real-mode kernel + itself. + + The unit is bytes starting with the beginning of the boot sector. + + This field is can be ignored when the protocol is 2.02 or higher, or + if the real-mode code is loaded at 0x90000. + +Field name: code32_start +Type: modify (optional, reloc) +Offset/size: 0x214/4 +Protocol: 2.00+ + + The address to jump to in protected mode. This defaults to the load + address of the kernel, and can be used by the boot loader to + determine the proper load address. + + This field can be modified for two purposes: + + 1. as a boot loader hook (see ADVANCED BOOT LOADER HOOKS below.) + + 2. if a bootloader which does not install a hook loads a + relocatable kernel at a nonstandard address it will have to modify + this field to point to the load address. + +Field name: ramdisk_image +Type: write (obligatory) +Offset/size: 0x218/4 +Protocol: 2.00+ + + The 32-bit linear address of the initial ramdisk or ramfs. Leave at + zero if there is no initial ramdisk/ramfs. + +Field name: ramdisk_size +Type: write (obligatory) +Offset/size: 0x21c/4 +Protocol: 2.00+ + + Size of the initial ramdisk or ramfs. Leave at zero if there is no + initial ramdisk/ramfs. + +Field name: bootsect_kludge +Type: kernel internal +Offset/size: 0x220/4 +Protocol: 2.00+ + + This field is obsolete. + +Field name: heap_end_ptr +Type: write (obligatory) +Offset/size: 0x224/2 +Protocol: 2.01+ + + Set this field to the offset (from the beginning of the real-mode + code) of the end of the setup stack/heap, minus 0x0200. + +Field name: cmd_line_ptr +Type: write (obligatory) +Offset/size: 0x228/4 +Protocol: 2.02+ + + Set this field to the linear address of the kernel command line. + The kernel command line can be located anywhere between the end of + the setup heap and 0xA0000; it does not have to be located in the + same 64K segment as the real-mode code itself. + + Fill in this field even if your boot loader does not support a + command line, in which case you can point this to an empty string + (or better yet, to the string "auto".) If this field is left at + zero, the kernel will assume that your boot loader does not support + the 2.02+ protocol. + +Field name: initrd_addr_max +Type: read +Offset/size: 0x22c/4 +Protocol: 2.03+ + + The maximum address that may be occupied by the initial + ramdisk/ramfs contents. For boot protocols 2.02 or earlier, this + field is not present, and the maximum address is 0x37FFFFFF. (This + address is defined as the address of the highest safe byte, so if + your ramdisk is exactly 131072 bytes long and this field is + 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.) + +Field name: kernel_alignment +Type: read (reloc) +Offset/size: 0x230/4 +Protocol: 2.05+ + + Alignment unit required by the kernel (if relocatable_kernel is true.) + +Field name: relocatable_kernel +Type: read (reloc) +Offset/size: 0x234/1 +Protocol: 2.05+ + + If this field is nonzero, the protected-mode part of the kernel can + be loaded at any address that satisfies the kernel_alignment field. + After loading, the boot loader must set the code32_start field to + point to the loaded code, or to a boot loader hook. + +Field name: cmdline_size +Type: read +Offset/size: 0x238/4 +Protocol: 2.06+ + + The maximum size of the command line without the terminating + zero. This means that the command line can contain at most + cmdline_size characters. With protocol version 2.05 and earlier, the + maximum size was 255. + +Field name: hardware_subarch +Type: write (optional, defaults to x86/PC) +Offset/size: 0x23c/4 +Protocol: 2.07+ + + In a paravirtualized environment the hardware low level architectural + pieces such as interrupt handling, page table handling, and + accessing process control registers needs to be done differently. + + This field allows the bootloader to inform the kernel we are in one + one of those environments. + + 0x00000000 The default x86/PC environment + 0x00000001 lguest + 0x00000002 Xen + +Field name: hardware_subarch_data +Type: write (subarch-dependent) +Offset/size: 0x240/8 +Protocol: 2.07+ + + A pointer to data that is specific to hardware subarch + This field is currently unused for the default x86/PC environment, + do not modify. + +Field name: payload_offset +Type: read +Offset/size: 0x248/4 +Protocol: 2.08+ + + If non-zero then this field contains the offset from the end of the + real-mode code to the payload. + + The payload may be compressed. The format of both the compressed and + uncompressed data should be determined using the standard magic + numbers. Currently only gzip compressed ELF is used. + +Field name: payload_length +Type: read +Offset/size: 0x24c/4 +Protocol: 2.08+ + + The length of the payload. + +Field name: setup_data +Type: write (special) +Offset/size: 0x250/8 +Protocol: 2.09+ + + The 64-bit physical pointer to NULL terminated single linked list of + struct setup_data. This is used to define a more extensible boot + parameters passing mechanism. The definition of struct setup_data is + as follow: + + struct setup_data { + u64 next; + u32 type; + u32 len; + u8 data[0]; + }; + + Where, the next is a 64-bit physical pointer to the next node of + linked list, the next field of the last node is 0; the type is used + to identify the contents of data; the len is the length of data + field; the data holds the real payload. + + This list may be modified at a number of points during the bootup + process. Therefore, when modifying this list one should always make + sure to consider the case where the linked list already contains + entries. + + +**** THE IMAGE CHECKSUM + +From boot protocol version 2.08 onwards the CRC-32 is calculated over +the entire file using the characteristic polynomial 0x04C11DB7 and an +initial remainder of 0xffffffff. The checksum is appended to the +file; therefore the CRC of the file up to the limit specified in the +syssize field of the header is always 0. + + +**** THE KERNEL COMMAND LINE + +The kernel command line has become an important way for the boot +loader to communicate with the kernel. Some of its options are also +relevant to the boot loader itself, see "special command line options" +below. + +The kernel command line is a null-terminated string. The maximum +length can be retrieved from the field cmdline_size. Before protocol +version 2.06, the maximum was 255 characters. A string that is too +long will be automatically truncated by the kernel. + +If the boot protocol version is 2.02 or later, the address of the +kernel command line is given by the header field cmd_line_ptr (see +above.) This address can be anywhere between the end of the setup +heap and 0xA0000. + +If the protocol version is *not* 2.02 or higher, the kernel +command line is entered using the following protocol: + + At offset 0x0020 (word), "cmd_line_magic", enter the magic + number 0xA33F. + + At offset 0x0022 (word), "cmd_line_offset", enter the offset + of the kernel command line (relative to the start of the + real-mode kernel). + + The kernel command line *must* be within the memory region + covered by setup_move_size, so you may need to adjust this + field. + + +**** MEMORY LAYOUT OF THE REAL-MODE CODE + +The real-mode code requires a stack/heap to be set up, as well as +memory allocated for the kernel command line. This needs to be done +in the real-mode accessible memory in bottom megabyte. + +It should be noted that modern machines often have a sizable Extended +BIOS Data Area (EBDA). As a result, it is advisable to use as little +of the low megabyte as possible. + +Unfortunately, under the following circumstances the 0x90000 memory +segment has to be used: + + - When loading a zImage kernel ((loadflags & 0x01) == 0). + - When loading a 2.01 or earlier boot protocol kernel. + + -> For the 2.00 and 2.01 boot protocols, the real-mode code + can be loaded at another address, but it is internally + relocated to 0x90000. For the "old" protocol, the + real-mode code must be loaded at 0x90000. + +When loading at 0x90000, avoid using memory above 0x9a000. + +For boot protocol 2.02 or higher, the command line does not have to be +located in the same 64K segment as the real-mode setup code; it is +thus permitted to give the stack/heap the full 64K segment and locate +the command line above it. + +The kernel command line should not be located below the real-mode +code, nor should it be located in high memory. + + +**** SAMPLE BOOT CONFIGURATION + +As a sample configuration, assume the following layout of the real +mode segment: + + When loading below 0x90000, use the entire segment: + + 0x0000-0x7fff Real mode kernel + 0x8000-0xdfff Stack and heap + 0xe000-0xffff Kernel command line + + When loading at 0x90000 OR the protocol version is 2.01 or earlier: + + 0x0000-0x7fff Real mode kernel + 0x8000-0x97ff Stack and heap + 0x9800-0x9fff Kernel command line + +Such a boot loader should enter the following fields in the header: + + unsigned long base_ptr; /* base address for real-mode segment */ + + if ( setup_sects == 0 ) { + setup_sects = 4; + } + + if ( protocol >= 0x0200 ) { + type_of_loader = ; + if ( loading_initrd ) { + ramdisk_image = ; + ramdisk_size = ; + } + + if ( protocol >= 0x0202 && loadflags & 0x01 ) + heap_end = 0xe000; + else + heap_end = 0x9800; + + if ( protocol >= 0x0201 ) { + heap_end_ptr = heap_end - 0x200; + loadflags |= 0x80; /* CAN_USE_HEAP */ + } + + if ( protocol >= 0x0202 ) { + cmd_line_ptr = base_ptr + heap_end; + strcpy(cmd_line_ptr, cmdline); + } else { + cmd_line_magic = 0xA33F; + cmd_line_offset = heap_end; + setup_move_size = heap_end + strlen(cmdline)+1; + strcpy(base_ptr+cmd_line_offset, cmdline); + } + } else { + /* Very old kernel */ + + heap_end = 0x9800; + + cmd_line_magic = 0xA33F; + cmd_line_offset = heap_end; + + /* A very old kernel MUST have its real-mode code + loaded at 0x90000 */ + + if ( base_ptr != 0x90000 ) { + /* Copy the real-mode kernel */ + memcpy(0x90000, base_ptr, (setup_sects+1)*512); + base_ptr = 0x90000; /* Relocated */ + } + + strcpy(0x90000+cmd_line_offset, cmdline); + + /* It is recommended to clear memory up to the 32K mark */ + memset(0x90000 + (setup_sects+1)*512, 0, + (64-(setup_sects+1))*512); + } + + +**** LOADING THE REST OF THE KERNEL + +The 32-bit (non-real-mode) kernel starts at offset (setup_sects+1)*512 +in the kernel file (again, if setup_sects == 0 the real value is 4.) +It should be loaded at address 0x10000 for Image/zImage kernels and +0x100000 for bzImage kernels. + +The kernel is a bzImage kernel if the protocol >= 2.00 and the 0x01 +bit (LOAD_HIGH) in the loadflags field is set: + + is_bzImage = (protocol >= 0x0200) && (loadflags & 0x01); + load_address = is_bzImage ? 0x100000 : 0x10000; + +Note that Image/zImage kernels can be up to 512K in size, and thus use +the entire 0x10000-0x90000 range of memory. This means it is pretty +much a requirement for these kernels to load the real-mode part at +0x90000. bzImage kernels allow much more flexibility. + + +**** SPECIAL COMMAND LINE OPTIONS + +If the command line provided by the boot loader is entered by the +user, the user may expect the following command line options to work. +They should normally not be deleted from the kernel command line even +though not all of them are actually meaningful to the kernel. Boot +loader authors who need additional command line options for the boot +loader itself should get them registered in +Documentation/kernel-parameters.txt to make sure they will not +conflict with actual kernel options now or in the future. + + vga= + here is either an integer (in C notation, either + decimal, octal, or hexadecimal) or one of the strings + "normal" (meaning 0xFFFF), "ext" (meaning 0xFFFE) or "ask" + (meaning 0xFFFD). This value should be entered into the + vid_mode field, as it is used by the kernel before the command + line is parsed. + + mem= + is an integer in C notation optionally followed by + (case insensitive) K, M, G, T, P or E (meaning << 10, << 20, + << 30, << 40, << 50 or << 60). This specifies the end of + memory to the kernel. This affects the possible placement of + an initrd, since an initrd should be placed near end of + memory. Note that this is an option to *both* the kernel and + the bootloader! + + initrd= + An initrd should be loaded. The meaning of is + obviously bootloader-dependent, and some boot loaders + (e.g. LILO) do not have such a command. + +In addition, some boot loaders add the following options to the +user-specified command line: + + BOOT_IMAGE= + The boot image which was loaded. Again, the meaning of + is obviously bootloader-dependent. + + auto + The kernel was booted without explicit user intervention. + +If these options are added by the boot loader, it is highly +recommended that they are located *first*, before the user-specified +or configuration-specified command line. Otherwise, "init=/bin/sh" +gets confused by the "auto" option. + + +**** RUNNING THE KERNEL + +The kernel is started by jumping to the kernel entry point, which is +located at *segment* offset 0x20 from the start of the real mode +kernel. This means that if you loaded your real-mode kernel code at +0x90000, the kernel entry point is 9020:0000. + +At entry, ds = es = ss should point to the start of the real-mode +kernel code (0x9000 if the code is loaded at 0x90000), sp should be +set up properly, normally pointing to the top of the heap, and +interrupts should be disabled. Furthermore, to guard against bugs in +the kernel, it is recommended that the boot loader sets fs = gs = ds = +es = ss. + +In our example from above, we would do: + + /* Note: in the case of the "old" kernel protocol, base_ptr must + be == 0x90000 at this point; see the previous sample code */ + + seg = base_ptr >> 4; + + cli(); /* Enter with interrupts disabled! */ + + /* Set up the real-mode kernel stack */ + _SS = seg; + _SP = heap_end; + + _DS = _ES = _FS = _GS = seg; + jmp_far(seg+0x20, 0); /* Run the kernel */ + +If your boot sector accesses a floppy drive, it is recommended to +switch off the floppy motor before running the kernel, since the +kernel boot leaves interrupts off and thus the motor will not be +switched off, especially if the loaded kernel has the floppy driver as +a demand-loaded module! + + +**** ADVANCED BOOT LOADER HOOKS + +If the boot loader runs in a particularly hostile environment (such as +LOADLIN, which runs under DOS) it may be impossible to follow the +standard memory location requirements. Such a boot loader may use the +following hooks that, if set, are invoked by the kernel at the +appropriate time. The use of these hooks should probably be +considered an absolutely last resort! + +IMPORTANT: All the hooks are required to preserve %esp, %ebp, %esi and +%edi across invocation. + + realmode_swtch: + A 16-bit real mode far subroutine invoked immediately before + entering protected mode. The default routine disables NMI, so + your routine should probably do so, too. + + code32_start: + A 32-bit flat-mode routine *jumped* to immediately after the + transition to protected mode, but before the kernel is + uncompressed. No segments, except CS, are guaranteed to be + set up (current kernels do, but older ones do not); you should + set them up to BOOT_DS (0x18) yourself. + + After completing your hook, you should jump to the address + that was in this field before your boot loader overwrote it + (relocated, if appropriate.) + + +**** 32-bit BOOT PROTOCOL + +For machine with some new BIOS other than legacy BIOS, such as EFI, +LinuxBIOS, etc, and kexec, the 16-bit real mode setup code in kernel +based on legacy BIOS can not be used, so a 32-bit boot protocol needs +to be defined. + +In 32-bit boot protocol, the first step in loading a Linux kernel +should be to setup the boot parameters (struct boot_params, +traditionally known as "zero page"). The memory for struct boot_params +should be allocated and initialized to all zero. Then the setup header +from offset 0x01f1 of kernel image on should be loaded into struct +boot_params and examined. The end of setup header can be calculated as +follow: + + 0x0202 + byte value at offset 0x0201 + +In addition to read/modify/write the setup header of the struct +boot_params as that of 16-bit boot protocol, the boot loader should +also fill the additional fields of the struct boot_params as that +described in zero-page.txt. + +After setupping the struct boot_params, the boot loader can load the +32/64-bit kernel in the same way as that of 16-bit boot protocol. + +In 32-bit boot protocol, the kernel is started by jumping to the +32-bit kernel entry point, which is the start address of loaded +32/64-bit kernel. + +At entry, the CPU must be in 32-bit protected mode with paging +disabled; a GDT must be loaded with the descriptors for selectors +__BOOT_CS(0x10) and __BOOT_DS(0x18); both descriptors must be 4G flat +segment; __BOOS_CS must have execute/read permission, and __BOOT_DS +must have read/write permission; CS must be __BOOT_CS and DS, ES, SS +must be __BOOT_DS; interrupt must be disabled; %esi must hold the base +address of the struct boot_params; %ebp, %edi and %ebx must be zero. diff --git a/Documentation/x86/i386/boot.txt b/Documentation/x86/i386/boot.txt deleted file mode 100644 index 147bfe5..0000000 --- a/Documentation/x86/i386/boot.txt +++ /dev/null @@ -1,900 +0,0 @@ - THE LINUX/x86 BOOT PROTOCOL - --------------------------- - -On the x86 platform, the Linux kernel uses a rather complicated boot -convention. This has evolved partially due to historical aspects, as -well as the desire in the early days to have the kernel itself be a -bootable image, the complicated PC memory model and due to changed -expectations in the PC industry caused by the effective demise of -real-mode DOS as a mainstream operating system. - -Currently, the following versions of the Linux/x86 boot protocol exist. - -Old kernels: zImage/Image support only. Some very early kernels - may not even support a command line. - -Protocol 2.00: (Kernel 1.3.73) Added bzImage and initrd support, as - well as a formalized way to communicate between the - boot loader and the kernel. setup.S made relocatable, - although the traditional setup area still assumed - writable. - -Protocol 2.01: (Kernel 1.3.76) Added a heap overrun warning. - -Protocol 2.02: (Kernel 2.4.0-test3-pre3) New command line protocol. - Lower the conventional memory ceiling. No overwrite - of the traditional setup area, thus making booting - safe for systems which use the EBDA from SMM or 32-bit - BIOS entry points. zImage deprecated but still - supported. - -Protocol 2.03: (Kernel 2.4.18-pre1) Explicitly makes the highest possible - initrd address available to the bootloader. - -Protocol 2.04: (Kernel 2.6.14) Extend the syssize field to four bytes. - -Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable. - Introduce relocatable_kernel and kernel_alignment fields. - -Protocol 2.06: (Kernel 2.6.22) Added a field that contains the size of - the boot command line. - -Protocol 2.07: (Kernel 2.6.24) Added paravirtualised boot protocol. - Introduced hardware_subarch and hardware_subarch_data - and KEEP_SEGMENTS flag in load_flags. - -Protocol 2.08: (Kernel 2.6.26) Added crc32 checksum and ELF format - payload. Introduced payload_offset and payload length - fields to aid in locating the payload. - -Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical - pointer to single linked list of struct setup_data. - -**** MEMORY LAYOUT - -The traditional memory map for the kernel loader, used for Image or -zImage kernels, typically looks like: - - | | -0A0000 +------------------------+ - | Reserved for BIOS | Do not use. Reserved for BIOS EBDA. -09A000 +------------------------+ - | Command line | - | Stack/heap | For use by the kernel real-mode code. -098000 +------------------------+ - | Kernel setup | The kernel real-mode code. -090200 +------------------------+ - | Kernel boot sector | The kernel legacy boot sector. -090000 +------------------------+ - | Protected-mode kernel | The bulk of the kernel image. -010000 +------------------------+ - | Boot loader | <- Boot sector entry point 0000:7C00 -001000 +------------------------+ - | Reserved for MBR/BIOS | -000800 +------------------------+ - | Typically used by MBR | -000600 +------------------------+ - | BIOS use only | -000000 +------------------------+ - - -When using bzImage, the protected-mode kernel was relocated to -0x100000 ("high memory"), and the kernel real-mode block (boot sector, -setup, and stack/heap) was made relocatable to any address between -0x10000 and end of low memory. Unfortunately, in protocols 2.00 and -2.01 the 0x90000+ memory range is still used internally by the kernel; -the 2.02 protocol resolves that problem. - -It is desirable to keep the "memory ceiling" -- the highest point in -low memory touched by the boot loader -- as low as possible, since -some newer BIOSes have begun to allocate some rather large amounts of -memory, called the Extended BIOS Data Area, near the top of low -memory. The boot loader should use the "INT 12h" BIOS call to verify -how much low memory is available. - -Unfortunately, if INT 12h reports that the amount of memory is too -low, there is usually nothing the boot loader can do but to report an -error to the user. The boot loader should therefore be designed to -take up as little space in low memory as it reasonably can. For -zImage or old bzImage kernels, which need data written into the -0x90000 segment, the boot loader should make sure not to use memory -above the 0x9A000 point; too many BIOSes will break above that point. - -For a modern bzImage kernel with boot protocol version >= 2.02, a -memory layout like the following is suggested: - - ~ ~ - | Protected-mode kernel | -100000 +------------------------+ - | I/O memory hole | -0A0000 +------------------------+ - | Reserved for BIOS | Leave as much as possible unused - ~ ~ - | Command line | (Can also be below the X+10000 mark) -X+10000 +------------------------+ - | Stack/heap | For use by the kernel real-mode code. -X+08000 +------------------------+ - | Kernel setup | The kernel real-mode code. - | Kernel boot sector | The kernel legacy boot sector. -X +------------------------+ - | Boot loader | <- Boot sector entry point 0000:7C00 -001000 +------------------------+ - | Reserved for MBR/BIOS | -000800 +------------------------+ - | Typically used by MBR | -000600 +------------------------+ - | BIOS use only | -000000 +------------------------+ - -... where the address X is as low as the design of the boot loader -permits. - - -**** THE REAL-MODE KERNEL HEADER - -In the following text, and anywhere in the kernel boot sequence, "a -sector" refers to 512 bytes. It is independent of the actual sector -size of the underlying medium. - -The first step in loading a Linux kernel should be to load the -real-mode code (boot sector and setup code) and then examine the -following header at offset 0x01f1. The real-mode code can total up to -32K, although the boot loader may choose to load only the first two -sectors (1K) and then examine the bootup sector size. - -The header looks like: - -Offset Proto Name Meaning -/Size - -01F1/1 ALL(1 setup_sects The size of the setup in sectors -01F2/2 ALL root_flags If set, the root is mounted readonly -01F4/4 2.04+(2 syssize The size of the 32-bit code in 16-byte paras -01F8/2 ALL ram_size DO NOT USE - for bootsect.S use only -01FA/2 ALL vid_mode Video mode control -01FC/2 ALL root_dev Default root device number -01FE/2 ALL boot_flag 0xAA55 magic number -0200/2 2.00+ jump Jump instruction -0202/4 2.00+ header Magic signature "HdrS" -0206/2 2.00+ version Boot protocol version supported -0208/4 2.00+ realmode_swtch Boot loader hook (see below) -020C/2 2.00+ start_sys The load-low segment (0x1000) (obsolete) -020E/2 2.00+ kernel_version Pointer to kernel version string -0210/1 2.00+ type_of_loader Boot loader identifier -0211/1 2.00+ loadflags Boot protocol option flags -0212/2 2.00+ setup_move_size Move to high memory size (used with hooks) -0214/4 2.00+ code32_start Boot loader hook (see below) -0218/4 2.00+ ramdisk_image initrd load address (set by boot loader) -021C/4 2.00+ ramdisk_size initrd size (set by boot loader) -0220/4 2.00+ bootsect_kludge DO NOT USE - for bootsect.S use only -0224/2 2.01+ heap_end_ptr Free memory after setup end -0226/2 N/A pad1 Unused -0228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line -022C/4 2.03+ initrd_addr_max Highest legal initrd address -0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel -0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not -0235/3 N/A pad2 Unused -0238/4 2.06+ cmdline_size Maximum size of the kernel command line -023C/4 2.07+ hardware_subarch Hardware subarchitecture -0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data -0248/4 2.08+ payload_offset Offset of kernel payload -024C/4 2.08+ payload_length Length of kernel payload -0250/8 2.09+ setup_data 64-bit physical pointer to linked list - of struct setup_data - -(1) For backwards compatibility, if the setup_sects field contains 0, the - real value is 4. - -(2) For boot protocol prior to 2.04, the upper two bytes of the syssize - field are unusable, which means the size of a bzImage kernel - cannot be determined. - -If the "HdrS" (0x53726448) magic number is not found at offset 0x202, -the boot protocol version is "old". Loading an old kernel, the -following parameters should be assumed: - - Image type = zImage - initrd not supported - Real-mode kernel must be located at 0x90000. - -Otherwise, the "version" field contains the protocol version, -e.g. protocol version 2.01 will contain 0x0201 in this field. When -setting fields in the header, you must make sure only to set fields -supported by the protocol version in use. - - -**** DETAILS OF HEADER FIELDS - -For each field, some are information from the kernel to the bootloader -("read"), some are expected to be filled out by the bootloader -("write"), and some are expected to be read and modified by the -bootloader ("modify"). - -All general purpose boot loaders should write the fields marked -(obligatory). Boot loaders who want to load the kernel at a -nonstandard address should fill in the fields marked (reloc); other -boot loaders can ignore those fields. - -The byte order of all fields is littleendian (this is x86, after all.) - -Field name: setup_sects -Type: read -Offset/size: 0x1f1/1 -Protocol: ALL - - The size of the setup code in 512-byte sectors. If this field is - 0, the real value is 4. The real-mode code consists of the boot - sector (always one 512-byte sector) plus the setup code. - -Field name: root_flags -Type: modify (optional) -Offset/size: 0x1f2/2 -Protocol: ALL - - If this field is nonzero, the root defaults to readonly. The use of - this field is deprecated; use the "ro" or "rw" options on the - command line instead. - -Field name: syssize -Type: read -Offset/size: 0x1f4/4 (protocol 2.04+) 0x1f4/2 (protocol ALL) -Protocol: 2.04+ - - The size of the protected-mode code in units of 16-byte paragraphs. - For protocol versions older than 2.04 this field is only two bytes - wide, and therefore cannot be trusted for the size of a kernel if - the LOAD_HIGH flag is set. - -Field name: ram_size -Type: kernel internal -Offset/size: 0x1f8/2 -Protocol: ALL - - This field is obsolete. - -Field name: vid_mode -Type: modify (obligatory) -Offset/size: 0x1fa/2 - - Please see the section on SPECIAL COMMAND LINE OPTIONS. - -Field name: root_dev -Type: modify (optional) -Offset/size: 0x1fc/2 -Protocol: ALL - - The default root device device number. The use of this field is - deprecated, use the "root=" option on the command line instead. - -Field name: boot_flag -Type: read -Offset/size: 0x1fe/2 -Protocol: ALL - - Contains 0xAA55. This is the closest thing old Linux kernels have - to a magic number. - -Field name: jump -Type: read -Offset/size: 0x200/2 -Protocol: 2.00+ - - Contains an x86 jump instruction, 0xEB followed by a signed offset - relative to byte 0x202. This can be used to determine the size of - the header. - -Field name: header -Type: read -Offset/size: 0x202/4 -Protocol: 2.00+ - - Contains the magic number "HdrS" (0x53726448). - -Field name: version -Type: read -Offset/size: 0x206/2 -Protocol: 2.00+ - - Contains the boot protocol version, in (major << 8)+minor format, - e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version - 10.17. - -Field name: readmode_swtch -Type: modify (optional) -Offset/size: 0x208/4 -Protocol: 2.00+ - - Boot loader hook (see ADVANCED BOOT LOADER HOOKS below.) - -Field name: start_sys -Type: read -Offset/size: 0x20c/4 -Protocol: 2.00+ - - The load low segment (0x1000). Obsolete. - -Field name: kernel_version -Type: read -Offset/size: 0x20e/2 -Protocol: 2.00+ - - If set to a nonzero value, contains a pointer to a NUL-terminated - human-readable kernel version number string, less 0x200. This can - be used to display the kernel version to the user. This value - should be less than (0x200*setup_sects). - - For example, if this value is set to 0x1c00, the kernel version - number string can be found at offset 0x1e00 in the kernel file. - This is a valid value if and only if the "setup_sects" field - contains the value 15 or higher, as: - - 0x1c00 < 15*0x200 (= 0x1e00) but - 0x1c00 >= 14*0x200 (= 0x1c00) - - 0x1c00 >> 9 = 14, so the minimum value for setup_secs is 15. - -Field name: type_of_loader -Type: write (obligatory) -Offset/size: 0x210/1 -Protocol: 2.00+ - - If your boot loader has an assigned id (see table below), enter - 0xTV here, where T is an identifier for the boot loader and V is - a version number. Otherwise, enter 0xFF here. - - Assigned boot loader ids: - 0 LILO (0x00 reserved for pre-2.00 bootloader) - 1 Loadlin - 2 bootsect-loader (0x20, all other values reserved) - 3 SYSLINUX - 4 EtherBoot - 5 ELILO - 7 GRuB - 8 U-BOOT - 9 Xen - A Gujin - B Qemu - - Please contact if you need a bootloader ID - value assigned. - -Field name: loadflags -Type: modify (obligatory) -Offset/size: 0x211/1 -Protocol: 2.00+ - - This field is a bitmask. - - Bit 0 (read): LOADED_HIGH - - If 0, the protected-mode code is loaded at 0x10000. - - If 1, the protected-mode code is loaded at 0x100000. - - Bit 5 (write): QUIET_FLAG - - If 0, print early messages. - - If 1, suppress early messages. - This requests to the kernel (decompressor and early - kernel) to not write early messages that require - accessing the display hardware directly. - - Bit 6 (write): KEEP_SEGMENTS - Protocol: 2.07+ - - If 0, reload the segment registers in the 32bit entry point. - - If 1, do not reload the segment registers in the 32bit entry point. - Assume that %cs %ds %ss %es are all set to flat segments with - a base of 0 (or the equivalent for their environment). - - Bit 7 (write): CAN_USE_HEAP - Set this bit to 1 to indicate that the value entered in the - heap_end_ptr is valid. If this field is clear, some setup code - functionality will be disabled. - -Field name: setup_move_size -Type: modify (obligatory) -Offset/size: 0x212/2 -Protocol: 2.00-2.01 - - When using protocol 2.00 or 2.01, if the real mode kernel is not - loaded at 0x90000, it gets moved there later in the loading - sequence. Fill in this field if you want additional data (such as - the kernel command line) moved in addition to the real-mode kernel - itself. - - The unit is bytes starting with the beginning of the boot sector. - - This field is can be ignored when the protocol is 2.02 or higher, or - if the real-mode code is loaded at 0x90000. - -Field name: code32_start -Type: modify (optional, reloc) -Offset/size: 0x214/4 -Protocol: 2.00+ - - The address to jump to in protected mode. This defaults to the load - address of the kernel, and can be used by the boot loader to - determine the proper load address. - - This field can be modified for two purposes: - - 1. as a boot loader hook (see ADVANCED BOOT LOADER HOOKS below.) - - 2. if a bootloader which does not install a hook loads a - relocatable kernel at a nonstandard address it will have to modify - this field to point to the load address. - -Field name: ramdisk_image -Type: write (obligatory) -Offset/size: 0x218/4 -Protocol: 2.00+ - - The 32-bit linear address of the initial ramdisk or ramfs. Leave at - zero if there is no initial ramdisk/ramfs. - -Field name: ramdisk_size -Type: write (obligatory) -Offset/size: 0x21c/4 -Protocol: 2.00+ - - Size of the initial ramdisk or ramfs. Leave at zero if there is no - initial ramdisk/ramfs. - -Field name: bootsect_kludge -Type: kernel internal -Offset/size: 0x220/4 -Protocol: 2.00+ - - This field is obsolete. - -Field name: heap_end_ptr -Type: write (obligatory) -Offset/size: 0x224/2 -Protocol: 2.01+ - - Set this field to the offset (from the beginning of the real-mode - code) of the end of the setup stack/heap, minus 0x0200. - -Field name: cmd_line_ptr -Type: write (obligatory) -Offset/size: 0x228/4 -Protocol: 2.02+ - - Set this field to the linear address of the kernel command line. - The kernel command line can be located anywhere between the end of - the setup heap and 0xA0000; it does not have to be located in the - same 64K segment as the real-mode code itself. - - Fill in this field even if your boot loader does not support a - command line, in which case you can point this to an empty string - (or better yet, to the string "auto".) If this field is left at - zero, the kernel will assume that your boot loader does not support - the 2.02+ protocol. - -Field name: initrd_addr_max -Type: read -Offset/size: 0x22c/4 -Protocol: 2.03+ - - The maximum address that may be occupied by the initial - ramdisk/ramfs contents. For boot protocols 2.02 or earlier, this - field is not present, and the maximum address is 0x37FFFFFF. (This - address is defined as the address of the highest safe byte, so if - your ramdisk is exactly 131072 bytes long and this field is - 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.) - -Field name: kernel_alignment -Type: read (reloc) -Offset/size: 0x230/4 -Protocol: 2.05+ - - Alignment unit required by the kernel (if relocatable_kernel is true.) - -Field name: relocatable_kernel -Type: read (reloc) -Offset/size: 0x234/1 -Protocol: 2.05+ - - If this field is nonzero, the protected-mode part of the kernel can - be loaded at any address that satisfies the kernel_alignment field. - After loading, the boot loader must set the code32_start field to - point to the loaded code, or to a boot loader hook. - -Field name: cmdline_size -Type: read -Offset/size: 0x238/4 -Protocol: 2.06+ - - The maximum size of the command line without the terminating - zero. This means that the command line can contain at most - cmdline_size characters. With protocol version 2.05 and earlier, the - maximum size was 255. - -Field name: hardware_subarch -Type: write (optional, defaults to x86/PC) -Offset/size: 0x23c/4 -Protocol: 2.07+ - - In a paravirtualized environment the hardware low level architectural - pieces such as interrupt handling, page table handling, and - accessing process control registers needs to be done differently. - - This field allows the bootloader to inform the kernel we are in one - one of those environments. - - 0x00000000 The default x86/PC environment - 0x00000001 lguest - 0x00000002 Xen - -Field name: hardware_subarch_data -Type: write (subarch-dependent) -Offset/size: 0x240/8 -Protocol: 2.07+ - - A pointer to data that is specific to hardware subarch - This field is currently unused for the default x86/PC environment, - do not modify. - -Field name: payload_offset -Type: read -Offset/size: 0x248/4 -Protocol: 2.08+ - - If non-zero then this field contains the offset from the end of the - real-mode code to the payload. - - The payload may be compressed. The format of both the compressed and - uncompressed data should be determined using the standard magic - numbers. Currently only gzip compressed ELF is used. - -Field name: payload_length -Type: read -Offset/size: 0x24c/4 -Protocol: 2.08+ - - The length of the payload. - -Field name: setup_data -Type: write (special) -Offset/size: 0x250/8 -Protocol: 2.09+ - - The 64-bit physical pointer to NULL terminated single linked list of - struct setup_data. This is used to define a more extensible boot - parameters passing mechanism. The definition of struct setup_data is - as follow: - - struct setup_data { - u64 next; - u32 type; - u32 len; - u8 data[0]; - }; - - Where, the next is a 64-bit physical pointer to the next node of - linked list, the next field of the last node is 0; the type is used - to identify the contents of data; the len is the length of data - field; the data holds the real payload. - - This list may be modified at a number of points during the bootup - process. Therefore, when modifying this list one should always make - sure to consider the case where the linked list already contains - entries. - - -**** THE IMAGE CHECKSUM - -From boot protocol version 2.08 onwards the CRC-32 is calculated over -the entire file using the characteristic polynomial 0x04C11DB7 and an -initial remainder of 0xffffffff. The checksum is appended to the -file; therefore the CRC of the file up to the limit specified in the -syssize field of the header is always 0. - - -**** THE KERNEL COMMAND LINE - -The kernel command line has become an important way for the boot -loader to communicate with the kernel. Some of its options are also -relevant to the boot loader itself, see "special command line options" -below. - -The kernel command line is a null-terminated string. The maximum -length can be retrieved from the field cmdline_size. Before protocol -version 2.06, the maximum was 255 characters. A string that is too -long will be automatically truncated by the kernel. - -If the boot protocol version is 2.02 or later, the address of the -kernel command line is given by the header field cmd_line_ptr (see -above.) This address can be anywhere between the end of the setup -heap and 0xA0000. - -If the protocol version is *not* 2.02 or higher, the kernel -command line is entered using the following protocol: - - At offset 0x0020 (word), "cmd_line_magic", enter the magic - number 0xA33F. - - At offset 0x0022 (word), "cmd_line_offset", enter the offset - of the kernel command line (relative to the start of the - real-mode kernel). - - The kernel command line *must* be within the memory region - covered by setup_move_size, so you may need to adjust this - field. - - -**** MEMORY LAYOUT OF THE REAL-MODE CODE - -The real-mode code requires a stack/heap to be set up, as well as -memory allocated for the kernel command line. This needs to be done -in the real-mode accessible memory in bottom megabyte. - -It should be noted that modern machines often have a sizable Extended -BIOS Data Area (EBDA). As a result, it is advisable to use as little -of the low megabyte as possible. - -Unfortunately, under the following circumstances the 0x90000 memory -segment has to be used: - - - When loading a zImage kernel ((loadflags & 0x01) == 0). - - When loading a 2.01 or earlier boot protocol kernel. - - -> For the 2.00 and 2.01 boot protocols, the real-mode code - can be loaded at another address, but it is internally - relocated to 0x90000. For the "old" protocol, the - real-mode code must be loaded at 0x90000. - -When loading at 0x90000, avoid using memory above 0x9a000. - -For boot protocol 2.02 or higher, the command line does not have to be -located in the same 64K segment as the real-mode setup code; it is -thus permitted to give the stack/heap the full 64K segment and locate -the command line above it. - -The kernel command line should not be located below the real-mode -code, nor should it be located in high memory. - - -**** SAMPLE BOOT CONFIGURATION - -As a sample configuration, assume the following layout of the real -mode segment: - - When loading below 0x90000, use the entire segment: - - 0x0000-0x7fff Real mode kernel - 0x8000-0xdfff Stack and heap - 0xe000-0xffff Kernel command line - - When loading at 0x90000 OR the protocol version is 2.01 or earlier: - - 0x0000-0x7fff Real mode kernel - 0x8000-0x97ff Stack and heap - 0x9800-0x9fff Kernel command line - -Such a boot loader should enter the following fields in the header: - - unsigned long base_ptr; /* base address for real-mode segment */ - - if ( setup_sects == 0 ) { - setup_sects = 4; - } - - if ( protocol >= 0x0200 ) { - type_of_loader = ; - if ( loading_initrd ) { - ramdisk_image = ; - ramdisk_size = ; - } - - if ( protocol >= 0x0202 && loadflags & 0x01 ) - heap_end = 0xe000; - else - heap_end = 0x9800; - - if ( protocol >= 0x0201 ) { - heap_end_ptr = heap_end - 0x200; - loadflags |= 0x80; /* CAN_USE_HEAP */ - } - - if ( protocol >= 0x0202 ) { - cmd_line_ptr = base_ptr + heap_end; - strcpy(cmd_line_ptr, cmdline); - } else { - cmd_line_magic = 0xA33F; - cmd_line_offset = heap_end; - setup_move_size = heap_end + strlen(cmdline)+1; - strcpy(base_ptr+cmd_line_offset, cmdline); - } - } else { - /* Very old kernel */ - - heap_end = 0x9800; - - cmd_line_magic = 0xA33F; - cmd_line_offset = heap_end; - - /* A very old kernel MUST have its real-mode code - loaded at 0x90000 */ - - if ( base_ptr != 0x90000 ) { - /* Copy the real-mode kernel */ - memcpy(0x90000, base_ptr, (setup_sects+1)*512); - base_ptr = 0x90000; /* Relocated */ - } - - strcpy(0x90000+cmd_line_offset, cmdline); - - /* It is recommended to clear memory up to the 32K mark */ - memset(0x90000 + (setup_sects+1)*512, 0, - (64-(setup_sects+1))*512); - } - - -**** LOADING THE REST OF THE KERNEL - -The 32-bit (non-real-mode) kernel starts at offset (setup_sects+1)*512 -in the kernel file (again, if setup_sects == 0 the real value is 4.) -It should be loaded at address 0x10000 for Image/zImage kernels and -0x100000 for bzImage kernels. - -The kernel is a bzImage kernel if the protocol >= 2.00 and the 0x01 -bit (LOAD_HIGH) in the loadflags field is set: - - is_bzImage = (protocol >= 0x0200) && (loadflags & 0x01); - load_address = is_bzImage ? 0x100000 : 0x10000; - -Note that Image/zImage kernels can be up to 512K in size, and thus use -the entire 0x10000-0x90000 range of memory. This means it is pretty -much a requirement for these kernels to load the real-mode part at -0x90000. bzImage kernels allow much more flexibility. - - -**** SPECIAL COMMAND LINE OPTIONS - -If the command line provided by the boot loader is entered by the -user, the user may expect the following command line options to work. -They should normally not be deleted from the kernel command line even -though not all of them are actually meaningful to the kernel. Boot -loader authors who need additional command line options for the boot -loader itself should get them registered in -Documentation/kernel-parameters.txt to make sure they will not -conflict with actual kernel options now or in the future. - - vga= - here is either an integer (in C notation, either - decimal, octal, or hexadecimal) or one of the strings - "normal" (meaning 0xFFFF), "ext" (meaning 0xFFFE) or "ask" - (meaning 0xFFFD). This value should be entered into the - vid_mode field, as it is used by the kernel before the command - line is parsed. - - mem= - is an integer in C notation optionally followed by - (case insensitive) K, M, G, T, P or E (meaning << 10, << 20, - << 30, << 40, << 50 or << 60). This specifies the end of - memory to the kernel. This affects the possible placement of - an initrd, since an initrd should be placed near end of - memory. Note that this is an option to *both* the kernel and - the bootloader! - - initrd= - An initrd should be loaded. The meaning of is - obviously bootloader-dependent, and some boot loaders - (e.g. LILO) do not have such a command. - -In addition, some boot loaders add the following options to the -user-specified command line: - - BOOT_IMAGE= - The boot image which was loaded. Again, the meaning of - is obviously bootloader-dependent. - - auto - The kernel was booted without explicit user intervention. - -If these options are added by the boot loader, it is highly -recommended that they are located *first*, before the user-specified -or configuration-specified command line. Otherwise, "init=/bin/sh" -gets confused by the "auto" option. - - -**** RUNNING THE KERNEL - -The kernel is started by jumping to the kernel entry point, which is -located at *segment* offset 0x20 from the start of the real mode -kernel. This means that if you loaded your real-mode kernel code at -0x90000, the kernel entry point is 9020:0000. - -At entry, ds = es = ss should point to the start of the real-mode -kernel code (0x9000 if the code is loaded at 0x90000), sp should be -set up properly, normally pointing to the top of the heap, and -interrupts should be disabled. Furthermore, to guard against bugs in -the kernel, it is recommended that the boot loader sets fs = gs = ds = -es = ss. - -In our example from above, we would do: - - /* Note: in the case of the "old" kernel protocol, base_ptr must - be == 0x90000 at this point; see the previous sample code */ - - seg = base_ptr >> 4; - - cli(); /* Enter with interrupts disabled! */ - - /* Set up the real-mode kernel stack */ - _SS = seg; - _SP = heap_end; - - _DS = _ES = _FS = _GS = seg; - jmp_far(seg+0x20, 0); /* Run the kernel */ - -If your boot sector accesses a floppy drive, it is recommended to -switch off the floppy motor before running the kernel, since the -kernel boot leaves interrupts off and thus the motor will not be -switched off, especially if the loaded kernel has the floppy driver as -a demand-loaded module! - - -**** ADVANCED BOOT LOADER HOOKS - -If the boot loader runs in a particularly hostile environment (such as -LOADLIN, which runs under DOS) it may be impossible to follow the -standard memory location requirements. Such a boot loader may use the -following hooks that, if set, are invoked by the kernel at the -appropriate time. The use of these hooks should probably be -considered an absolutely last resort! - -IMPORTANT: All the hooks are required to preserve %esp, %ebp, %esi and -%edi across invocation. - - realmode_swtch: - A 16-bit real mode far subroutine invoked immediately before - entering protected mode. The default routine disables NMI, so - your routine should probably do so, too. - - code32_start: - A 32-bit flat-mode routine *jumped* to immediately after the - transition to protected mode, but before the kernel is - uncompressed. No segments, except CS, are guaranteed to be - set up (current kernels do, but older ones do not); you should - set them up to BOOT_DS (0x18) yourself. - - After completing your hook, you should jump to the address - that was in this field before your boot loader overwrote it - (relocated, if appropriate.) - - -**** 32-bit BOOT PROTOCOL - -For machine with some new BIOS other than legacy BIOS, such as EFI, -LinuxBIOS, etc, and kexec, the 16-bit real mode setup code in kernel -based on legacy BIOS can not be used, so a 32-bit boot protocol needs -to be defined. - -In 32-bit boot protocol, the first step in loading a Linux kernel -should be to setup the boot parameters (struct boot_params, -traditionally known as "zero page"). The memory for struct boot_params -should be allocated and initialized to all zero. Then the setup header -from offset 0x01f1 of kernel image on should be loaded into struct -boot_params and examined. The end of setup header can be calculated as -follow: - - 0x0202 + byte value at offset 0x0201 - -In addition to read/modify/write the setup header of the struct -boot_params as that of 16-bit boot protocol, the boot loader should -also fill the additional fields of the struct boot_params as that -described in zero-page.txt. - -After setupping the struct boot_params, the boot loader can load the -32/64-bit kernel in the same way as that of 16-bit boot protocol. - -In 32-bit boot protocol, the kernel is started by jumping to the -32-bit kernel entry point, which is the start address of loaded -32/64-bit kernel. - -At entry, the CPU must be in 32-bit protected mode with paging -disabled; a GDT must be loaded with the descriptors for selectors -__BOOT_CS(0x10) and __BOOT_DS(0x18); both descriptors must be 4G flat -segment; __BOOS_CS must have execute/read permission, and __BOOT_DS -must have read/write permission; CS must be __BOOT_CS and DS, ES, SS -must be __BOOT_DS; interrupt must be disabled; %esi must hold the base -address of the struct boot_params; %ebp, %edi and %ebx must be zero. diff --git a/Documentation/x86/i386/usb-legacy-support.txt b/Documentation/x86/i386/usb-legacy-support.txt deleted file mode 100644 index 1894cdf..0000000 --- a/Documentation/x86/i386/usb-legacy-support.txt +++ /dev/null @@ -1,44 +0,0 @@ -USB Legacy support -~~~~~~~~~~~~~~~~~~ - -Vojtech Pavlik , January 2004 - - -Also known as "USB Keyboard" or "USB Mouse support" in the BIOS Setup is a -feature that allows one to use the USB mouse and keyboard as if they were -their classic PS/2 counterparts. This means one can use an USB keyboard to -type in LILO for example. - -It has several drawbacks, though: - -1) On some machines, the emulated PS/2 mouse takes over even when no USB - mouse is present and a real PS/2 mouse is present. In that case the extra - features (wheel, extra buttons, touchpad mode) of the real PS/2 mouse may - not be available. - -2) If CONFIG_HIGHMEM64G is enabled, the PS/2 mouse emulation can cause - system crashes, because the SMM BIOS is not expecting to be in PAE mode. - The Intel E7505 is a typical machine where this happens. - -3) If AMD64 64-bit mode is enabled, again system crashes often happen, - because the SMM BIOS isn't expecting the CPU to be in 64-bit mode. The - BIOS manufacturers only test with Windows, and Windows doesn't do 64-bit - yet. - -Solutions: - -Problem 1) can be solved by loading the USB drivers prior to loading the -PS/2 mouse driver. Since the PS/2 mouse driver is in 2.6 compiled into -the kernel unconditionally, this means the USB drivers need to be -compiled-in, too. - -Problem 2) can currently only be solved by either disabling HIGHMEM64G -in the kernel config or USB Legacy support in the BIOS. A BIOS update -could help, but so far no such update exists. - -Problem 3) is usually fixed by a BIOS update. Check the board -manufacturers web site. If an update is not available, disable USB -Legacy support in the BIOS. If this alone doesn't help, try also adding -idle=poll on the kernel command line. The BIOS may be entering the SMM -on the HLT instruction as well. - diff --git a/Documentation/x86/i386/zero-page.txt b/Documentation/x86/i386/zero-page.txt deleted file mode 100644 index 169ad42..0000000 --- a/Documentation/x86/i386/zero-page.txt +++ /dev/null @@ -1,31 +0,0 @@ -The additional fields in struct boot_params as a part of 32-bit boot -protocol of kernel. These should be filled by bootloader or 16-bit -real-mode setup code of the kernel. References/settings to it mainly -are in: - - include/asm-x86/bootparam.h - - -Offset Proto Name Meaning -/Size - -000/040 ALL screen_info Text mode or frame buffer information - (struct screen_info) -040/014 ALL apm_bios_info APM BIOS information (struct apm_bios_info) -060/010 ALL ist_info Intel SpeedStep (IST) BIOS support information - (struct ist_info) -080/010 ALL hd0_info hd0 disk parameter, OBSOLETE!! -090/010 ALL hd1_info hd1 disk parameter, OBSOLETE!! -0A0/010 ALL sys_desc_table System description table (struct sys_desc_table) -140/080 ALL edid_info Video mode setup (struct edid_info) -1C0/020 ALL efi_info EFI 32 information (struct efi_info) -1E0/004 ALL alk_mem_k Alternative mem check, in KB -1E4/004 ALL scratch Scratch field for the kernel setup code -1E8/001 ALL e820_entries Number of entries in e820_map (below) -1E9/001 ALL eddbuf_entries Number of entries in eddbuf (below) -1EA/001 ALL edd_mbr_sig_buf_entries Number of entries in edd_mbr_sig_buffer - (below) -290/040 ALL edd_mbr_sig_buffer EDD MBR signatures -2D0/A00 ALL e820_map E820 memory map table - (array of struct e820entry) -D00/1EC ALL eddbuf EDD data (array of struct edd_info) diff --git a/Documentation/x86/usb-legacy-support.txt b/Documentation/x86/usb-legacy-support.txt new file mode 100644 index 0000000..1894cdf --- /dev/null +++ b/Documentation/x86/usb-legacy-support.txt @@ -0,0 +1,44 @@ +USB Legacy support +~~~~~~~~~~~~~~~~~~ + +Vojtech Pavlik , January 2004 + + +Also known as "USB Keyboard" or "USB Mouse support" in the BIOS Setup is a +feature that allows one to use the USB mouse and keyboard as if they were +their classic PS/2 counterparts. This means one can use an USB keyboard to +type in LILO for example. + +It has several drawbacks, though: + +1) On some machines, the emulated PS/2 mouse takes over even when no USB + mouse is present and a real PS/2 mouse is present. In that case the extra + features (wheel, extra buttons, touchpad mode) of the real PS/2 mouse may + not be available. + +2) If CONFIG_HIGHMEM64G is enabled, the PS/2 mouse emulation can cause + system crashes, because the SMM BIOS is not expecting to be in PAE mode. + The Intel E7505 is a typical machine where this happens. + +3) If AMD64 64-bit mode is enabled, again system crashes often happen, + because the SMM BIOS isn't expecting the CPU to be in 64-bit mode. The + BIOS manufacturers only test with Windows, and Windows doesn't do 64-bit + yet. + +Solutions: + +Problem 1) can be solved by loading the USB drivers prior to loading the +PS/2 mouse driver. Since the PS/2 mouse driver is in 2.6 compiled into +the kernel unconditionally, this means the USB drivers need to be +compiled-in, too. + +Problem 2) can currently only be solved by either disabling HIGHMEM64G +in the kernel config or USB Legacy support in the BIOS. A BIOS update +could help, but so far no such update exists. + +Problem 3) is usually fixed by a BIOS update. Check the board +manufacturers web site. If an update is not available, disable USB +Legacy support in the BIOS. If this alone doesn't help, try also adding +idle=poll on the kernel command line. The BIOS may be entering the SMM +on the HLT instruction as well. + diff --git a/Documentation/x86/zero-page.txt b/Documentation/x86/zero-page.txt new file mode 100644 index 0000000..169ad42 --- /dev/null +++ b/Documentation/x86/zero-page.txt @@ -0,0 +1,31 @@ +The additional fields in struct boot_params as a part of 32-bit boot +protocol of kernel. These should be filled by bootloader or 16-bit +real-mode setup code of the kernel. References/settings to it mainly +are in: + + include/asm-x86/bootparam.h + + +Offset Proto Name Meaning +/Size + +000/040 ALL screen_info Text mode or frame buffer information + (struct screen_info) +040/014 ALL apm_bios_info APM BIOS information (struct apm_bios_info) +060/010 ALL ist_info Intel SpeedStep (IST) BIOS support information + (struct ist_info) +080/010 ALL hd0_info hd0 disk parameter, OBSOLETE!! +090/010 ALL hd1_info hd1 disk parameter, OBSOLETE!! +0A0/010 ALL sys_desc_table System description table (struct sys_desc_table) +140/080 ALL edid_info Video mode setup (struct edid_info) +1C0/020 ALL efi_info EFI 32 information (struct efi_info) +1E0/004 ALL alk_mem_k Alternative mem check, in KB +1E4/004 ALL scratch Scratch field for the kernel setup code +1E8/001 ALL e820_entries Number of entries in e820_map (below) +1E9/001 ALL eddbuf_entries Number of entries in eddbuf (below) +1EA/001 ALL edd_mbr_sig_buf_entries Number of entries in edd_mbr_sig_buffer + (below) +290/040 ALL edd_mbr_sig_buffer EDD MBR signatures +2D0/A00 ALL e820_map E820 memory map table + (array of struct e820entry) +D00/1EC ALL eddbuf EDD data (array of struct edd_info) -- cgit v0.10.2 From a021e5124a6c57325ffb02a60cd1d5f40342f8aa Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 22 Jul 2008 15:33:57 -0400 Subject: x86: doc: boot.txt: fix the size of the start_sys field The start_sys field is two bytes, not four. Signed-off-by: H. Peter Anvin diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index 147bfe5..83c0033 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt @@ -308,7 +308,7 @@ Protocol: 2.00+ Field name: start_sys Type: read -Offset/size: 0x20c/4 +Offset/size: 0x20c/2 Protocol: 2.00+ The load low segment (0x1000). Obsolete. -- cgit v0.10.2 From a31863168660c6b6f6c7ffe05bb6a38e97803326 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Tue, 22 Jul 2008 21:53:53 +0200 Subject: x86: consolidate header guards This patch consolidates the header guard names which are also used externally, i.e. in .c files. Signed-off-by: Vegard Nossum diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index bc5553b..3ee338c 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -16,7 +16,7 @@ */ #undef CONFIG_PARAVIRT #ifdef CONFIG_X86_32 -#define _ASM_DESC_H_ 1 +#define ASM_X86__DESC_H 1 #endif #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index aa89387..505543a 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -22,7 +22,7 @@ #define __NO_STUBS 1 #undef __SYSCALL -#undef _ASM_X86_64_UNISTD_H_ +#undef ASM_X86__UNISTD_64_H #define __SYSCALL(nr, sym) [nr] = 1, static char syscalls[] = { #include diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c index 170d43c..3d1be4f 100644 --- a/arch/x86/kernel/syscall_64.c +++ b/arch/x86/kernel/syscall_64.c @@ -8,12 +8,12 @@ #define __NO_STUBS #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; -#undef _ASM_X86_64_UNISTD_H_ +#undef ASM_X86__UNISTD_64_H #include #undef __SYSCALL #define __SYSCALL(nr, sym) [nr] = sym, -#undef _ASM_X86_64_UNISTD_H_ +#undef ASM_X86__UNISTD_64_H typedef void (*sys_call_ptr_t)(void); diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h index a44c4dc..b73fea5 100644 --- a/include/asm-x86/desc.h +++ b/include/asm-x86/desc.h @@ -1,5 +1,5 @@ -#ifndef _ASM_DESC_H_ -#define _ASM_DESC_H_ +#ifndef ASM_X86__DESC_H +#define ASM_X86__DESC_H #ifndef __ASSEMBLY__ #include @@ -397,4 +397,4 @@ static inline void set_system_gate_ist(int n, void *addr, unsigned ist) #endif /* __ASSEMBLY__ */ -#endif +#endif /* ASM_X86__DESC_H */ diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h index 9c1a4a3..1e10189 100644 --- a/include/asm-x86/unistd_64.h +++ b/include/asm-x86/unistd_64.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_64_UNISTD_H_ -#define _ASM_X86_64_UNISTD_H_ +#ifndef ASM_X86__UNISTD_64_H +#define ASM_X86__UNISTD_64_H #ifndef __SYSCALL #define __SYSCALL(a, b) @@ -676,4 +676,4 @@ __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) #define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") #endif /* __KERNEL__ */ -#endif /* _ASM_X86_64_UNISTD_H_ */ +#endif /* ASM_X86__UNISTD_64_H */ -- cgit v0.10.2 From 2b97df06ce44b1d145bd1299f50765803c2fabee Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Wed, 23 Jul 2008 17:13:14 +0530 Subject: x86: apic_XX.c declare functions before they get used declared following smp interrupts in asm-x86/hw_irq.h: smp_apic_timer_interrupt, smp_spurious_interrupt, smp_error_interrupt Signed-off-by: Jaswinder Singh diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index 133c998..519ad65 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -54,6 +54,11 @@ extern int disable_apic; #endif extern int is_vsmp_box(void); +extern void xapic_wait_icr_idle(void); +extern u32 safe_xapic_wait_icr_idle(void); +extern u64 xapic_icr_read(void); +extern void xapic_icr_write(u32, u32); +extern int setup_profiling_timer(unsigned int); static inline void native_apic_write(unsigned long reg, u32 v) { diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index 77ba51d..40b941a 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -93,6 +93,16 @@ extern asmlinkage void qic_reschedule_interrupt(void); extern asmlinkage void qic_enable_irq_interrupt(void); extern asmlinkage void qic_call_function_interrupt(void); +/* SMP */ +extern void smp_apic_timer_interrupt(struct pt_regs *); +#ifdef CONFIG_X86_32 +extern void smp_spurious_interrupt(struct pt_regs *); +extern void smp_error_interrupt(struct pt_regs *); +#else +extern asmlinkage void smp_spurious_interrupt(void); +extern asmlinkage void smp_error_interrupt(void); +#endif + #ifdef CONFIG_X86_32 extern void (*const interrupt[NR_IRQS])(void); #else -- cgit v0.10.2 From 8f7db5186cf126b56035d9a9735774d751090d66 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Wed, 23 Jul 2008 17:31:02 +0530 Subject: x86: vm86_32.c declare functions before they get used declared following syscalls in asm-x86/syscalls.h: sys_vm86old, sys_vm86 Signed-off-by: Jaswinder Singh diff --git a/include/asm-x86/syscalls.h b/include/asm-x86/syscalls.h index 170fcb1..3d6b159 100644 --- a/include/asm-x86/syscalls.h +++ b/include/asm-x86/syscalls.h @@ -6,7 +6,6 @@ * This file is released under the GPLv2. * See the file COPYING for more details. * - * Please do not call me directly, include linux/syscalls.h */ #ifndef _ASM_X86_SYSCALLS_H @@ -60,6 +59,10 @@ asmlinkage int sys_olduname(struct oldold_utsname __user *); asmlinkage int sys_set_thread_area(struct user_desc __user *); asmlinkage int sys_get_thread_area(struct user_desc __user *); +/* kernel/vm86_32.c */ +asmlinkage int sys_vm86old(struct pt_regs); +asmlinkage int sys_vm86(struct pt_regs); + #else /* CONFIG_X86_32 */ /* X86_64 only */ -- cgit v0.10.2 From a80495ec927e8ec2b1ff085592bbe9bed77ffb3b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Wed, 23 Jul 2008 17:33:57 +0530 Subject: x86: mm/init_XX.c declare functions before they get used included in mm/init_32.c for zap_low_mappings() declared free_initmem() in asm-x86/page_XX.h Signed-off-by: Jaswinder Singh diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index d37f293..4974e97 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -47,6 +47,7 @@ #include #include #include +#include unsigned int __VMALLOC_RESERVE = 128 << 20; diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h index ab85287..c9dd069 100644 --- a/include/asm-x86/page_32.h +++ b/include/asm-x86/page_32.h @@ -96,6 +96,7 @@ extern void find_low_pfn_range(void); extern unsigned long init_memory_mapping(unsigned long start, unsigned long end); extern void initmem_init(unsigned long, unsigned long); +extern void free_initmem(void); extern void setup_bootmem_allocator(void); diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h index c6916c8..e5a754e 100644 --- a/include/asm-x86/page_64.h +++ b/include/asm-x86/page_64.h @@ -91,6 +91,7 @@ extern unsigned long init_memory_mapping(unsigned long start, unsigned long end); extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); +extern void free_initmem(void); extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); -- cgit v0.10.2 From 70ef56414ec7e01d787c8e959bb259845df4ee4f Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Wed, 23 Jul 2008 17:36:37 +0530 Subject: x86: mm/fault.c declare do_page_fault before they get used declared do_page_fault() in asm-x86/trap.h for both X86_32 and X86_64 removed do_invalid_op declaration from mm/fault.c as it is already declared in asm-x86/trap.h Signed-off-by: Jaswinder Singh diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 455f3fe..8f92cac 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -35,6 +35,7 @@ #include #include #include +#include /* * Page fault error code bits @@ -357,8 +358,6 @@ static int is_errata100(struct pt_regs *regs, unsigned long address) return 0; } -void do_invalid_op(struct pt_regs *, unsigned long); - static int is_f00f_bug(struct pt_regs *regs, unsigned long address) { #ifdef CONFIG_X86_F00F_BUG diff --git a/include/asm-x86/traps.h b/include/asm-x86/traps.h index a4b65a7..b60fab5 100644 --- a/include/asm-x86/traps.h +++ b/include/asm-x86/traps.h @@ -51,6 +51,8 @@ void do_spurious_interrupt_bug(struct pt_regs *, long); unsigned long patch_espfix_desc(unsigned long, unsigned long); asmlinkage void math_emulate(long); +void do_page_fault(struct pt_regs *regs, unsigned long error_code); + #else /* CONFIG_X86_32 */ asmlinkage void double_fault(void); @@ -62,5 +64,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *); asmlinkage void do_simd_coprocessor_error(struct pt_regs *); asmlinkage void do_spurious_interrupt_bug(struct pt_regs *); +asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code); + #endif /* CONFIG_X86_32 */ #endif /* _ASM_X86_TRAPS_H */ -- cgit v0.10.2 From 4b6e9f27d0034740e9cfa341b45c229ba30ec0c5 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Wed, 23 Jul 2008 17:39:16 +0530 Subject: x86: mm/ioremap.c declare early_ioremap_debug and early_ioremap_nested as static Signed-off-by: Jaswinder Singh diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 24c1d3c..19fd9a3 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -413,7 +413,7 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr) return; } -int __initdata early_ioremap_debug; +static int __initdata early_ioremap_debug; static int __init early_ioremap_debug_setup(char *str) { @@ -539,7 +539,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx) } -int __initdata early_ioremap_nested; +static int __initdata early_ioremap_nested; static int __init check_early_ioremap_leak(void) { -- cgit v0.10.2 From e0b7c8192ded4c2096388008d3ca6708caa8b601 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Wed, 23 Jul 2008 17:40:34 +0530 Subject: x86: mm/pageattr.c declare arch_report_meminfo before they get used declared arch_report_meminfo() in asm-x86/pgtable.h as it will be also accessible by fs/proc/proc_misc.c Signed-off-by: Jaswinder Singh diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 96aa76e..4fb22b9 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -310,6 +310,8 @@ static inline void native_pagetable_setup_start(pgd_t *base) {} static inline void native_pagetable_setup_done(pgd_t *base) {} #endif +extern int arch_report_meminfo(char *page); + #ifdef CONFIG_PARAVIRT #include #else /* !CONFIG_PARAVIRT */ -- cgit v0.10.2 From 01eb7858c017b1c63b962f8c2ad37133383ca560 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Wed, 23 Jul 2008 17:41:59 +0530 Subject: x86: mm/pgtable_32.c declare set_pmd_pfn before they get used Signed-off-by: Jaswinder Singh diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index 0611abf..015fb4e 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h @@ -31,6 +31,7 @@ static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } void paging_init(void); +extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); /* * The Linux x86 paging architecture is 'compile-time dual-mode', it -- cgit v0.10.2 From 71e3b818431957371c7f69fa1c576d4a403c1478 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Wed, 23 Jul 2008 17:44:00 +0530 Subject: x86: mach-default/setup.c declare no_broadcast before they get used included mach_ipi.h for no_broadcast declaration fixed minor spacing for no_broadcast Signed-off-by: Jaswinder Singh diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 3d31783..3f2cf11 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -10,13 +10,15 @@ #include #include +#include + #ifdef CONFIG_HOTPLUG_CPU #define DEFAULT_SEND_IPI (1) #else #define DEFAULT_SEND_IPI (0) #endif -int no_broadcast=DEFAULT_SEND_IPI; +int no_broadcast = DEFAULT_SEND_IPI; /** * pre_intr_init_hook - initialisation prior to setting up interrupt vectors -- cgit v0.10.2 From 05d3ed0a1fe3ea05ab9f3b8d32576a0bc2e19660 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 21 Jul 2008 10:15:22 -0400 Subject: x86, pci: iommu fix potential overflow in alloc_iommu() It is possible that alloc_iommu()'s boundary_size overflows as dma_get_seg_boundary can return 0xffffffff. In that case, further usage of boundary_size triggers a BUG_ON() in the iommu code. Signed-off-by: Prarit Bhargava Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index df5f142..1062dc1 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -93,7 +93,7 @@ static unsigned long alloc_iommu(struct device *dev, int size) base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; spin_lock_irqsave(&iommu_bitmap_lock, flags); -- cgit v0.10.2 From fd60b39f845aa7462453af8aed4f059b162f181f Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Wed, 23 Jul 2008 22:45:01 +0800 Subject: arch/x86/kernel/genx2apic_uv_x.c: Removed duplicated include Removed duplicated include file in arch/x86/kernel/genx2apic_uv_x.c. Signed-off-by: Huang Weiyi Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index a882bc3..14a97727 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include -- cgit v0.10.2 From a300bec952127d9a15e666b391bb35c9aecb3002 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 22 Jul 2008 09:46:13 -0700 Subject: documentation: move hpet.txt to timers/ subdirectory Move hpet.txt to Documentation/timers/ subdirectory. Add 00-INDEX to Documentation/timers/ subdirectory. Signed-off-by: Randy Dunlap Cc: tglx Signed-off-by: Thomas Gleixner diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 1977fab..6c40c4c 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -161,8 +161,6 @@ hayes-esp.txt - info on using the Hayes ESP serial driver. highuid.txt - notes on the change from 16 bit to 32 bit user/group IDs. -hpet.txt - - High Precision Event Timer Driver for Linux. timers/ - info on the timer related topics hw_random.txt diff --git a/Documentation/hpet.txt b/Documentation/hpet.txt deleted file mode 100644 index 6ad52d9..0000000 --- a/Documentation/hpet.txt +++ /dev/null @@ -1,300 +0,0 @@ - High Precision Event Timer Driver for Linux - -The High Precision Event Timer (HPET) hardware is the future replacement -for the 8254 and Real Time Clock (RTC) periodic timer functionality. -Each HPET can have up to 32 timers. It is possible to configure the -first two timers as legacy replacements for 8254 and RTC periodic timers. -A specification done by Intel and Microsoft can be found at -. - -The driver supports detection of HPET driver allocation and initialization -of the HPET before the driver module_init routine is called. This enables -platform code which uses timer 0 or 1 as the main timer to intercept HPET -initialization. An example of this initialization can be found in -arch/i386/kernel/time_hpet.c. - -The driver provides two APIs which are very similar to the API found in -the rtc.c driver. There is a user space API and a kernel space API. -An example user space program is provided below. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -extern void hpet_open_close(int, const char **); -extern void hpet_info(int, const char **); -extern void hpet_poll(int, const char **); -extern void hpet_fasync(int, const char **); -extern void hpet_read(int, const char **); - -#include -#include -#include - -struct hpet_command { - char *command; - void (*func)(int argc, const char ** argv); -} hpet_command[] = { - { - "open-close", - hpet_open_close - }, - { - "info", - hpet_info - }, - { - "poll", - hpet_poll - }, - { - "fasync", - hpet_fasync - }, -}; - -int -main(int argc, const char ** argv) -{ - int i; - - argc--; - argv++; - - if (!argc) { - fprintf(stderr, "-hpet: requires command\n"); - return -1; - } - - - for (i = 0; i < (sizeof (hpet_command) / sizeof (hpet_command[0])); i++) - if (!strcmp(argv[0], hpet_command[i].command)) { - argc--; - argv++; - fprintf(stderr, "-hpet: executing %s\n", - hpet_command[i].command); - hpet_command[i].func(argc, argv); - return 0; - } - - fprintf(stderr, "do_hpet: command %s not implemented\n", argv[0]); - - return -1; -} - -void -hpet_open_close(int argc, const char **argv) -{ - int fd; - - if (argc != 1) { - fprintf(stderr, "hpet_open_close: device-name\n"); - return; - } - - fd = open(argv[0], O_RDONLY); - if (fd < 0) - fprintf(stderr, "hpet_open_close: open failed\n"); - else - close(fd); - - return; -} - -void -hpet_info(int argc, const char **argv) -{ -} - -void -hpet_poll(int argc, const char **argv) -{ - unsigned long freq; - int iterations, i, fd; - struct pollfd pfd; - struct hpet_info info; - struct timeval stv, etv; - struct timezone tz; - long usec; - - if (argc != 3) { - fprintf(stderr, "hpet_poll: device-name freq iterations\n"); - return; - } - - freq = atoi(argv[1]); - iterations = atoi(argv[2]); - - fd = open(argv[0], O_RDONLY); - - if (fd < 0) { - fprintf(stderr, "hpet_poll: open of %s failed\n", argv[0]); - return; - } - - if (ioctl(fd, HPET_IRQFREQ, freq) < 0) { - fprintf(stderr, "hpet_poll: HPET_IRQFREQ failed\n"); - goto out; - } - - if (ioctl(fd, HPET_INFO, &info) < 0) { - fprintf(stderr, "hpet_poll: failed to get info\n"); - goto out; - } - - fprintf(stderr, "hpet_poll: info.hi_flags 0x%lx\n", info.hi_flags); - - if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) { - fprintf(stderr, "hpet_poll: HPET_EPI failed\n"); - goto out; - } - - if (ioctl(fd, HPET_IE_ON, 0) < 0) { - fprintf(stderr, "hpet_poll, HPET_IE_ON failed\n"); - goto out; - } - - pfd.fd = fd; - pfd.events = POLLIN; - - for (i = 0; i < iterations; i++) { - pfd.revents = 0; - gettimeofday(&stv, &tz); - if (poll(&pfd, 1, -1) < 0) - fprintf(stderr, "hpet_poll: poll failed\n"); - else { - long data; - - gettimeofday(&etv, &tz); - usec = stv.tv_sec * 1000000 + stv.tv_usec; - usec = (etv.tv_sec * 1000000 + etv.tv_usec) - usec; - - fprintf(stderr, - "hpet_poll: expired time = 0x%lx\n", usec); - - fprintf(stderr, "hpet_poll: revents = 0x%x\n", - pfd.revents); - - if (read(fd, &data, sizeof(data)) != sizeof(data)) { - fprintf(stderr, "hpet_poll: read failed\n"); - } - else - fprintf(stderr, "hpet_poll: data 0x%lx\n", - data); - } - } - -out: - close(fd); - return; -} - -static int hpet_sigio_count; - -static void -hpet_sigio(int val) -{ - fprintf(stderr, "hpet_sigio: called\n"); - hpet_sigio_count++; -} - -void -hpet_fasync(int argc, const char **argv) -{ - unsigned long freq; - int iterations, i, fd, value; - sig_t oldsig; - struct hpet_info info; - - hpet_sigio_count = 0; - fd = -1; - - if ((oldsig = signal(SIGIO, hpet_sigio)) == SIG_ERR) { - fprintf(stderr, "hpet_fasync: failed to set signal handler\n"); - return; - } - - if (argc != 3) { - fprintf(stderr, "hpet_fasync: device-name freq iterations\n"); - goto out; - } - - fd = open(argv[0], O_RDONLY); - - if (fd < 0) { - fprintf(stderr, "hpet_fasync: failed to open %s\n", argv[0]); - return; - } - - - if ((fcntl(fd, F_SETOWN, getpid()) == 1) || - ((value = fcntl(fd, F_GETFL)) == 1) || - (fcntl(fd, F_SETFL, value | O_ASYNC) == 1)) { - fprintf(stderr, "hpet_fasync: fcntl failed\n"); - goto out; - } - - freq = atoi(argv[1]); - iterations = atoi(argv[2]); - - if (ioctl(fd, HPET_IRQFREQ, freq) < 0) { - fprintf(stderr, "hpet_fasync: HPET_IRQFREQ failed\n"); - goto out; - } - - if (ioctl(fd, HPET_INFO, &info) < 0) { - fprintf(stderr, "hpet_fasync: failed to get info\n"); - goto out; - } - - fprintf(stderr, "hpet_fasync: info.hi_flags 0x%lx\n", info.hi_flags); - - if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) { - fprintf(stderr, "hpet_fasync: HPET_EPI failed\n"); - goto out; - } - - if (ioctl(fd, HPET_IE_ON, 0) < 0) { - fprintf(stderr, "hpet_fasync, HPET_IE_ON failed\n"); - goto out; - } - - for (i = 0; i < iterations; i++) { - (void) pause(); - fprintf(stderr, "hpet_fasync: count = %d\n", hpet_sigio_count); - } - -out: - signal(SIGIO, oldsig); - - if (fd >= 0) - close(fd); - - return; -} - -The kernel API has three interfaces exported from the driver: - - hpet_register(struct hpet_task *tp, int periodic) - hpet_unregister(struct hpet_task *tp) - hpet_control(struct hpet_task *tp, unsigned int cmd, unsigned long arg) - -The kernel module using this interface fills in the ht_func and ht_data -members of the hpet_task structure before calling hpet_register. -hpet_control simply vectors to the hpet_ioctl routine and has the same -commands and respective arguments as the user API. hpet_unregister -is used to terminate usage of the HPET timer reserved by hpet_register. diff --git a/Documentation/timers/00-INDEX b/Documentation/timers/00-INDEX new file mode 100644 index 0000000..397dc35 --- /dev/null +++ b/Documentation/timers/00-INDEX @@ -0,0 +1,10 @@ +00-INDEX + - this file +highres.txt + - High resolution timers and dynamic ticks design notes +hpet.txt + - High Precision Event Timer Driver for Linux +hrtimers.txt + - subsystem for high-resolution kernel timers +timer_stats.txt + - timer usage statistics diff --git a/Documentation/timers/hpet.txt b/Documentation/timers/hpet.txt new file mode 100644 index 0000000..6ad52d9 --- /dev/null +++ b/Documentation/timers/hpet.txt @@ -0,0 +1,300 @@ + High Precision Event Timer Driver for Linux + +The High Precision Event Timer (HPET) hardware is the future replacement +for the 8254 and Real Time Clock (RTC) periodic timer functionality. +Each HPET can have up to 32 timers. It is possible to configure the +first two timers as legacy replacements for 8254 and RTC periodic timers. +A specification done by Intel and Microsoft can be found at +. + +The driver supports detection of HPET driver allocation and initialization +of the HPET before the driver module_init routine is called. This enables +platform code which uses timer 0 or 1 as the main timer to intercept HPET +initialization. An example of this initialization can be found in +arch/i386/kernel/time_hpet.c. + +The driver provides two APIs which are very similar to the API found in +the rtc.c driver. There is a user space API and a kernel space API. +An example user space program is provided below. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +extern void hpet_open_close(int, const char **); +extern void hpet_info(int, const char **); +extern void hpet_poll(int, const char **); +extern void hpet_fasync(int, const char **); +extern void hpet_read(int, const char **); + +#include +#include +#include + +struct hpet_command { + char *command; + void (*func)(int argc, const char ** argv); +} hpet_command[] = { + { + "open-close", + hpet_open_close + }, + { + "info", + hpet_info + }, + { + "poll", + hpet_poll + }, + { + "fasync", + hpet_fasync + }, +}; + +int +main(int argc, const char ** argv) +{ + int i; + + argc--; + argv++; + + if (!argc) { + fprintf(stderr, "-hpet: requires command\n"); + return -1; + } + + + for (i = 0; i < (sizeof (hpet_command) / sizeof (hpet_command[0])); i++) + if (!strcmp(argv[0], hpet_command[i].command)) { + argc--; + argv++; + fprintf(stderr, "-hpet: executing %s\n", + hpet_command[i].command); + hpet_command[i].func(argc, argv); + return 0; + } + + fprintf(stderr, "do_hpet: command %s not implemented\n", argv[0]); + + return -1; +} + +void +hpet_open_close(int argc, const char **argv) +{ + int fd; + + if (argc != 1) { + fprintf(stderr, "hpet_open_close: device-name\n"); + return; + } + + fd = open(argv[0], O_RDONLY); + if (fd < 0) + fprintf(stderr, "hpet_open_close: open failed\n"); + else + close(fd); + + return; +} + +void +hpet_info(int argc, const char **argv) +{ +} + +void +hpet_poll(int argc, const char **argv) +{ + unsigned long freq; + int iterations, i, fd; + struct pollfd pfd; + struct hpet_info info; + struct timeval stv, etv; + struct timezone tz; + long usec; + + if (argc != 3) { + fprintf(stderr, "hpet_poll: device-name freq iterations\n"); + return; + } + + freq = atoi(argv[1]); + iterations = atoi(argv[2]); + + fd = open(argv[0], O_RDONLY); + + if (fd < 0) { + fprintf(stderr, "hpet_poll: open of %s failed\n", argv[0]); + return; + } + + if (ioctl(fd, HPET_IRQFREQ, freq) < 0) { + fprintf(stderr, "hpet_poll: HPET_IRQFREQ failed\n"); + goto out; + } + + if (ioctl(fd, HPET_INFO, &info) < 0) { + fprintf(stderr, "hpet_poll: failed to get info\n"); + goto out; + } + + fprintf(stderr, "hpet_poll: info.hi_flags 0x%lx\n", info.hi_flags); + + if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) { + fprintf(stderr, "hpet_poll: HPET_EPI failed\n"); + goto out; + } + + if (ioctl(fd, HPET_IE_ON, 0) < 0) { + fprintf(stderr, "hpet_poll, HPET_IE_ON failed\n"); + goto out; + } + + pfd.fd = fd; + pfd.events = POLLIN; + + for (i = 0; i < iterations; i++) { + pfd.revents = 0; + gettimeofday(&stv, &tz); + if (poll(&pfd, 1, -1) < 0) + fprintf(stderr, "hpet_poll: poll failed\n"); + else { + long data; + + gettimeofday(&etv, &tz); + usec = stv.tv_sec * 1000000 + stv.tv_usec; + usec = (etv.tv_sec * 1000000 + etv.tv_usec) - usec; + + fprintf(stderr, + "hpet_poll: expired time = 0x%lx\n", usec); + + fprintf(stderr, "hpet_poll: revents = 0x%x\n", + pfd.revents); + + if (read(fd, &data, sizeof(data)) != sizeof(data)) { + fprintf(stderr, "hpet_poll: read failed\n"); + } + else + fprintf(stderr, "hpet_poll: data 0x%lx\n", + data); + } + } + +out: + close(fd); + return; +} + +static int hpet_sigio_count; + +static void +hpet_sigio(int val) +{ + fprintf(stderr, "hpet_sigio: called\n"); + hpet_sigio_count++; +} + +void +hpet_fasync(int argc, const char **argv) +{ + unsigned long freq; + int iterations, i, fd, value; + sig_t oldsig; + struct hpet_info info; + + hpet_sigio_count = 0; + fd = -1; + + if ((oldsig = signal(SIGIO, hpet_sigio)) == SIG_ERR) { + fprintf(stderr, "hpet_fasync: failed to set signal handler\n"); + return; + } + + if (argc != 3) { + fprintf(stderr, "hpet_fasync: device-name freq iterations\n"); + goto out; + } + + fd = open(argv[0], O_RDONLY); + + if (fd < 0) { + fprintf(stderr, "hpet_fasync: failed to open %s\n", argv[0]); + return; + } + + + if ((fcntl(fd, F_SETOWN, getpid()) == 1) || + ((value = fcntl(fd, F_GETFL)) == 1) || + (fcntl(fd, F_SETFL, value | O_ASYNC) == 1)) { + fprintf(stderr, "hpet_fasync: fcntl failed\n"); + goto out; + } + + freq = atoi(argv[1]); + iterations = atoi(argv[2]); + + if (ioctl(fd, HPET_IRQFREQ, freq) < 0) { + fprintf(stderr, "hpet_fasync: HPET_IRQFREQ failed\n"); + goto out; + } + + if (ioctl(fd, HPET_INFO, &info) < 0) { + fprintf(stderr, "hpet_fasync: failed to get info\n"); + goto out; + } + + fprintf(stderr, "hpet_fasync: info.hi_flags 0x%lx\n", info.hi_flags); + + if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) { + fprintf(stderr, "hpet_fasync: HPET_EPI failed\n"); + goto out; + } + + if (ioctl(fd, HPET_IE_ON, 0) < 0) { + fprintf(stderr, "hpet_fasync, HPET_IE_ON failed\n"); + goto out; + } + + for (i = 0; i < iterations; i++) { + (void) pause(); + fprintf(stderr, "hpet_fasync: count = %d\n", hpet_sigio_count); + } + +out: + signal(SIGIO, oldsig); + + if (fd >= 0) + close(fd); + + return; +} + +The kernel API has three interfaces exported from the driver: + + hpet_register(struct hpet_task *tp, int periodic) + hpet_unregister(struct hpet_task *tp) + hpet_control(struct hpet_task *tp, unsigned int cmd, unsigned long arg) + +The kernel module using this interface fills in the ht_func and ht_data +members of the hpet_task structure before calling hpet_register. +hpet_control simply vectors to the hpet_ioctl routine and has the same +commands and respective arguments as the user API. hpet_unregister +is used to terminate usage of the HPET timer reserved by hpet_register. -- cgit v0.10.2 From 38ffbe66d59051fd9cfcfc8545f164700e2fa3bc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 23 Jul 2008 14:21:18 -0700 Subject: x86/paravirt/xen: properly fill out the ldt ops LTP testing showed that Xen does not properly implement sys_modify_ldt(). This patch does the final little bits needed to make the ldt work properly. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 3fee2aa..4895e06 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -51,6 +51,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, (mincount - oldsize) * LDT_ENTRY_SIZE); + paravirt_alloc_ldt(newldt, mincount); + #ifdef CONFIG_X86_64 /* CHECKME: Do we really need this ? */ wmb(); @@ -75,6 +77,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) #endif } if (oldsize) { + paravirt_free_ldt(oldldt, oldsize); if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) vfree(oldldt); else @@ -86,10 +89,13 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) static inline int copy_ldt(mm_context_t *new, mm_context_t *old) { int err = alloc_ldt(new, old->size, 0); + int i; if (err < 0) return err; - memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE); + + for(i = 0; i < old->size; i++) + write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); return 0; } @@ -126,6 +132,7 @@ void destroy_context(struct mm_struct *mm) if (mm == current->active_mm) clear_LDT(); #endif + paravirt_free_ldt(mm->context.ldt, mm->context.size); if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) vfree(mm->context.ldt); else diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 94da4d52..d8f2277 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -348,6 +348,10 @@ struct pv_cpu_ops pv_cpu_ops = { .write_ldt_entry = native_write_ldt_entry, .write_gdt_entry = native_write_gdt_entry, .write_idt_entry = native_write_idt_entry, + + .alloc_ldt = paravirt_nop, + .free_ldt = paravirt_nop, + .load_sp0 = native_load_sp0, #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 9ff6e3c..06219e6 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -325,6 +325,26 @@ static unsigned long xen_store_tr(void) return 0; } +static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) +{ + unsigned pages = roundup(entries * LDT_ENTRY_SIZE, PAGE_SIZE); + void *v = ldt; + int i; + + for(i = 0; i < pages; i += PAGE_SIZE) + make_lowmem_page_readonly(v + i); +} + +static void xen_free_ldt(struct desc_struct *ldt, unsigned entries) +{ + unsigned pages = roundup(entries * LDT_ENTRY_SIZE, PAGE_SIZE); + void *v = ldt; + int i; + + for(i = 0; i < pages; i += PAGE_SIZE) + make_lowmem_page_readwrite(v + i); +} + static void xen_set_ldt(const void *addr, unsigned entries) { struct mmuext_op *op; @@ -1220,6 +1240,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .load_gs_index = xen_load_gs_index, #endif + .alloc_ldt = xen_alloc_ldt, + .free_ldt = xen_free_ldt, + .store_gdt = native_store_gdt, .store_idt = native_store_idt, .store_tr = xen_store_tr, diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h index a44c4dc..24a524f 100644 --- a/include/asm-x86/desc.h +++ b/include/asm-x86/desc.h @@ -97,7 +97,15 @@ static inline int desc_empty(const void *ptr) native_write_gdt_entry(dt, entry, desc, type) #define write_idt_entry(dt, entry, g) \ native_write_idt_entry(dt, entry, g) -#endif + +static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) +{ +} + +static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) +{ +} +#endif /* CONFIG_PARAVIRT */ static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate) diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index fbbde93..db9b064 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -124,6 +124,9 @@ struct pv_cpu_ops { int entrynum, const void *desc, int size); void (*write_idt_entry)(gate_desc *, int entrynum, const gate_desc *gate); + void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); + void (*free_ldt)(struct desc_struct *ldt, unsigned entries); + void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); void (*set_iopl_mask)(unsigned mask); @@ -824,6 +827,16 @@ do { \ (aux) = __aux; \ } while (0) +static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) +{ + PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries); +} + +static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) +{ + PVOP_VCALL2(pv_cpu_ops.free_ldt, ldt, entries); +} + static inline void load_TR_desc(void) { PVOP_VCALL0(pv_cpu_ops.load_tr_desc); -- cgit v0.10.2 From d5de8841355a48f7f634a04507185eaf1f9755e3 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 23 Jul 2008 13:28:58 -0700 Subject: x86: split spinlock implementations out into their own files ftrace requires certain low-level code, like spinlocks and timestamps, to be compiled without -pg in order to avoid infinite recursion. This patch splits out the core paravirt spinlocks and the Xen spinlocks into separate files which can be compiled without -pg. Also do xen/time.c while we're about it. As a result, we can now use ftrace within a Xen domain. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 3db651f..d679cb2 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -10,7 +10,7 @@ ifdef CONFIG_FTRACE # Do not profile debug and lowlevel utilities CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg -CFLAGS_REMOVE_paravirt.o = -pg +CFLAGS_REMOVE_paravirt-spinlocks.o = -pg endif # @@ -89,7 +89,7 @@ obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o obj-$(CONFIG_KVM_GUEST) += kvm.o obj-$(CONFIG_KVM_CLOCK) += kvmclock.o -obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o +obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c new file mode 100644 index 0000000..38d7f7f --- /dev/null +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -0,0 +1,31 @@ +/* + * Split spinlock implementation out into its own file, so it can be + * compiled in a FTRACE-compatible way. + */ +#include +#include + +#include + +struct pv_lock_ops pv_lock_ops = { +#ifdef CONFIG_SMP + .spin_is_locked = __ticket_spin_is_locked, + .spin_is_contended = __ticket_spin_is_contended, + + .spin_lock = __ticket_spin_lock, + .spin_trylock = __ticket_spin_trylock, + .spin_unlock = __ticket_spin_unlock, +#endif +}; +EXPORT_SYMBOL_GPL(pv_lock_ops); + +void __init paravirt_use_bytelocks(void) +{ +#ifdef CONFIG_SMP + pv_lock_ops.spin_is_locked = __byte_spin_is_locked; + pv_lock_ops.spin_is_contended = __byte_spin_is_contended; + pv_lock_ops.spin_lock = __byte_spin_lock; + pv_lock_ops.spin_trylock = __byte_spin_trylock; + pv_lock_ops.spin_unlock = __byte_spin_unlock; +#endif +} diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 94da4d52..0d71de9 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -268,17 +268,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return __get_cpu_var(paravirt_lazy_mode); } -void __init paravirt_use_bytelocks(void) -{ -#ifdef CONFIG_SMP - pv_lock_ops.spin_is_locked = __byte_spin_is_locked; - pv_lock_ops.spin_is_contended = __byte_spin_is_contended; - pv_lock_ops.spin_lock = __byte_spin_lock; - pv_lock_ops.spin_trylock = __byte_spin_trylock; - pv_lock_ops.spin_unlock = __byte_spin_unlock; -#endif -} - struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, @@ -461,18 +450,6 @@ struct pv_mmu_ops pv_mmu_ops = { .set_fixmap = native_set_fixmap, }; -struct pv_lock_ops pv_lock_ops = { -#ifdef CONFIG_SMP - .spin_is_locked = __ticket_spin_is_locked, - .spin_is_contended = __ticket_spin_is_contended, - - .spin_lock = __ticket_spin_lock, - .spin_trylock = __ticket_spin_trylock, - .spin_unlock = __ticket_spin_unlock, -#endif -}; -EXPORT_SYMBOL_GPL(pv_lock_ops); - EXPORT_SYMBOL_GPL(pv_time_ops); EXPORT_SYMBOL (pv_cpu_ops); EXPORT_SYMBOL (pv_mmu_ops); diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 59c1e53..5bfee24 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,4 +1,10 @@ +ifdef CONFIG_FTRACE +# Do not profile debug and lowlevel utilities +CFLAGS_REMOVE_spinlock.o = -pg +CFLAGS_REMOVE_time.o = -pg +endif + obj-y := enlighten.o setup.o multicalls.o mmu.o \ time.o xen-asm_$(BITS).o grant-table.o suspend.o -obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SMP) += smp.o spinlock.o diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index d8faf79..baca7f2 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -15,7 +15,6 @@ * This does not handle HOTPLUG_CPU yet. */ #include -#include #include #include @@ -36,8 +35,6 @@ #include "xen-ops.h" #include "mmu.h" -static void __cpuinit xen_init_lock_cpu(int cpu); - cpumask_t xen_cpu_initialized_map; static DEFINE_PER_CPU(int, resched_irq); @@ -419,170 +416,6 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -struct xen_spinlock { - unsigned char lock; /* 0 -> free; 1 -> locked */ - unsigned short spinners; /* count of waiting cpus */ -}; - -static int xen_spin_is_locked(struct raw_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - - return xl->lock != 0; -} - -static int xen_spin_is_contended(struct raw_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - - /* Not strictly true; this is only the count of contended - lock-takers entering the slow path. */ - return xl->spinners != 0; -} - -static int xen_spin_trylock(struct raw_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - u8 old = 1; - - asm("xchgb %b0,%1" - : "+q" (old), "+m" (xl->lock) : : "memory"); - - return old == 0; -} - -static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; -static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); - -static inline void spinning_lock(struct xen_spinlock *xl) -{ - __get_cpu_var(lock_spinners) = xl; - wmb(); /* set lock of interest before count */ - asm(LOCK_PREFIX " incw %0" - : "+m" (xl->spinners) : : "memory"); -} - -static inline void unspinning_lock(struct xen_spinlock *xl) -{ - asm(LOCK_PREFIX " decw %0" - : "+m" (xl->spinners) : : "memory"); - wmb(); /* decrement count before clearing lock */ - __get_cpu_var(lock_spinners) = NULL; -} - -static noinline int xen_spin_lock_slow(struct raw_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - int irq = __get_cpu_var(lock_kicker_irq); - int ret; - - /* If kicker interrupts not initialized yet, just spin */ - if (irq == -1) - return 0; - - /* announce we're spinning */ - spinning_lock(xl); - - /* clear pending */ - xen_clear_irq_pending(irq); - - /* check again make sure it didn't become free while - we weren't looking */ - ret = xen_spin_trylock(lock); - if (ret) - goto out; - - /* block until irq becomes pending */ - xen_poll_irq(irq); - kstat_this_cpu.irqs[irq]++; - -out: - unspinning_lock(xl); - return ret; -} - -static void xen_spin_lock(struct raw_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - int timeout; - u8 oldval; - - do { - timeout = 1 << 10; - - asm("1: xchgb %1,%0\n" - " testb %1,%1\n" - " jz 3f\n" - "2: rep;nop\n" - " cmpb $0,%0\n" - " je 1b\n" - " dec %2\n" - " jnz 2b\n" - "3:\n" - : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) - : "1" (1) - : "memory"); - - } while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock))); -} - -static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) -{ - int cpu; - - for_each_online_cpu(cpu) { - /* XXX should mix up next cpu selection */ - if (per_cpu(lock_spinners, cpu) == xl) { - xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); - break; - } - } -} - -static void xen_spin_unlock(struct raw_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - - smp_wmb(); /* make sure no writes get moved after unlock */ - xl->lock = 0; /* release lock */ - - /* make sure unlock happens before kick */ - barrier(); - - if (unlikely(xl->spinners)) - xen_spin_unlock_slow(xl); -} - -static __cpuinit void xen_init_lock_cpu(int cpu) -{ - int irq; - const char *name; - - name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); - irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, - cpu, - xen_reschedule_interrupt, - IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, - name, - NULL); - - if (irq >= 0) { - disable_irq(irq); /* make sure it's never delivered */ - per_cpu(lock_kicker_irq, cpu) = irq; - } - - printk("cpu %d spinlock event irq %d\n", cpu, irq); -} - -static void __init xen_init_spinlocks(void) -{ - pv_lock_ops.spin_is_locked = xen_spin_is_locked; - pv_lock_ops.spin_is_contended = xen_spin_is_contended; - pv_lock_ops.spin_lock = xen_spin_lock; - pv_lock_ops.spin_trylock = xen_spin_trylock; - pv_lock_ops.spin_unlock = xen_spin_unlock; -} - static const struct smp_ops xen_smp_ops __initdata = { .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, .smp_prepare_cpus = xen_smp_prepare_cpus, diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c new file mode 100644 index 0000000..8dc4d31 --- /dev/null +++ b/arch/x86/xen/spinlock.c @@ -0,0 +1,183 @@ +/* + * Split spinlock implementation out into its own file, so it can be + * compiled in a FTRACE-compatible way. + */ +#include +#include + +#include + +#include +#include + +#include "xen-ops.h" + +struct xen_spinlock { + unsigned char lock; /* 0 -> free; 1 -> locked */ + unsigned short spinners; /* count of waiting cpus */ +}; + +static int xen_spin_is_locked(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + + return xl->lock != 0; +} + +static int xen_spin_is_contended(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + + /* Not strictly true; this is only the count of contended + lock-takers entering the slow path. */ + return xl->spinners != 0; +} + +static int xen_spin_trylock(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + u8 old = 1; + + asm("xchgb %b0,%1" + : "+q" (old), "+m" (xl->lock) : : "memory"); + + return old == 0; +} + +static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; +static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); + +static inline void spinning_lock(struct xen_spinlock *xl) +{ + __get_cpu_var(lock_spinners) = xl; + wmb(); /* set lock of interest before count */ + asm(LOCK_PREFIX " incw %0" + : "+m" (xl->spinners) : : "memory"); +} + +static inline void unspinning_lock(struct xen_spinlock *xl) +{ + asm(LOCK_PREFIX " decw %0" + : "+m" (xl->spinners) : : "memory"); + wmb(); /* decrement count before clearing lock */ + __get_cpu_var(lock_spinners) = NULL; +} + +static noinline int xen_spin_lock_slow(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + int irq = __get_cpu_var(lock_kicker_irq); + int ret; + + /* If kicker interrupts not initialized yet, just spin */ + if (irq == -1) + return 0; + + /* announce we're spinning */ + spinning_lock(xl); + + /* clear pending */ + xen_clear_irq_pending(irq); + + /* check again make sure it didn't become free while + we weren't looking */ + ret = xen_spin_trylock(lock); + if (ret) + goto out; + + /* block until irq becomes pending */ + xen_poll_irq(irq); + kstat_this_cpu.irqs[irq]++; + +out: + unspinning_lock(xl); + return ret; +} + +static void xen_spin_lock(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + int timeout; + u8 oldval; + + do { + timeout = 1 << 10; + + asm("1: xchgb %1,%0\n" + " testb %1,%1\n" + " jz 3f\n" + "2: rep;nop\n" + " cmpb $0,%0\n" + " je 1b\n" + " dec %2\n" + " jnz 2b\n" + "3:\n" + : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) + : "1" (1) + : "memory"); + + } while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock))); +} + +static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) +{ + int cpu; + + for_each_online_cpu(cpu) { + /* XXX should mix up next cpu selection */ + if (per_cpu(lock_spinners, cpu) == xl) { + xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); + break; + } + } +} + +static void xen_spin_unlock(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + + smp_wmb(); /* make sure no writes get moved after unlock */ + xl->lock = 0; /* release lock */ + + /* make sure unlock happens before kick */ + barrier(); + + if (unlikely(xl->spinners)) + xen_spin_unlock_slow(xl); +} + +static irqreturn_t dummy_handler(int irq, void *dev_id) +{ + BUG(); + return IRQ_HANDLED; +} + +void __cpuinit xen_init_lock_cpu(int cpu) +{ + int irq; + const char *name; + + name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); + irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, + cpu, + dummy_handler, + IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, + name, + NULL); + + if (irq >= 0) { + disable_irq(irq); /* make sure it's never delivered */ + per_cpu(lock_kicker_irq, cpu) = irq; + } + + printk("cpu %d spinlock event irq %d\n", cpu, irq); +} + +void __init xen_init_spinlocks(void) +{ + pv_lock_ops.spin_is_locked = xen_spin_is_locked; + pv_lock_ops.spin_is_contended = xen_spin_is_contended; + pv_lock_ops.spin_lock = xen_spin_lock; + pv_lock_ops.spin_trylock = xen_spin_trylock; + pv_lock_ops.spin_unlock = xen_spin_unlock; +} diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index dd3c231..8847fb3 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -50,6 +50,9 @@ void __init xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP void xen_smp_init(void); +void __init xen_init_spinlocks(void); +__cpuinit void xen_init_lock_cpu(int cpu); + extern cpumask_t xen_cpu_initialized_map; #else static inline void xen_smp_init(void) {} -- cgit v0.10.2 From e0a5a5d9b006fd441e61685a051fa85d52fb172c Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 22 Jul 2008 18:14:16 +0200 Subject: x86, 64-bit, dwarf2: push pushes 8 bytes and popf pops 8 The CFI_ADJUST_CFA_OFFSET dwarf2 annotation of a push/popf pair in ret_from_fork wrongly used a value of 4. It should have been 8. Fix that. Signed-off-by: Alexander van Heukelum Cc: Andi Kleen Cc: heukelum@fastmail.fm Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 89434d4..cf3a0b2 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -275,9 +275,9 @@ ENTRY(native_usergs_sysret64) ENTRY(ret_from_fork) CFI_DEFAULT_STACK push kernel_eflags(%rip) - CFI_ADJUST_CFA_OFFSET 4 + CFI_ADJUST_CFA_OFFSET 8 popf # reset kernel eflags - CFI_ADJUST_CFA_OFFSET -4 + CFI_ADJUST_CFA_OFFSET -8 call schedule_tail GET_THREAD_INFO(%rcx) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) -- cgit v0.10.2 From 32f71aff77b6470d272f80ac28f43f9601c4d140 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 21 Jul 2008 00:52:49 +0100 Subject: x86: PIC, L-APIC and I/O APIC debug information Dump all the PIC, local APIC and I/O APIC information at the fs_initcall() level, which is after ACPI (if used) has initialised PCI information, making the point of invocation consistent across MP-table and ACPI platforms. Remove explicit calls to print_IO_APIC() from elsewhere. Make the interface of all the functions involved consistent between 32-bit and 64-bit versions and make them all static by default by the means of a New-and-Improved(TM) __apicdebuginit() macro. Note that like print_IO_APIC() all these only output anything if "apic=debug" has been passed to the kernel through the command line. Signed-off-by: Maciej W. Rozycki Cc: Chuck Ebbert Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index de9aa0e..d54455e 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -50,6 +50,8 @@ #include #include +#define __apicdebuginit(type) static type __init + int (*ioapic_renumber_irq)(int ioapic, int irq); atomic_t irq_mis_count; @@ -1345,7 +1347,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, ioapic_write_entry(apic, pin, entry); } -void __init print_IO_APIC(void) + +__apicdebuginit(void) print_IO_APIC(void) { int apic, i; union IO_APIC_reg_00 reg_00; @@ -1460,9 +1463,7 @@ void __init print_IO_APIC(void) return; } -#if 0 - -static void print_APIC_bitfield(int base) +__apicdebuginit(void) print_APIC_bitfield(int base) { unsigned int v; int i, j; @@ -1483,7 +1484,7 @@ static void print_APIC_bitfield(int base) } } -void /*__init*/ print_local_APIC(void *dummy) +__apicdebuginit(void) print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; @@ -1567,12 +1568,12 @@ void /*__init*/ print_local_APIC(void *dummy) printk("\n"); } -void print_all_local_APICs(void) +__apicdebuginit(void) print_all_local_APICs(void) { on_each_cpu(print_local_APIC, NULL, 1); } -void /*__init*/ print_PIC(void) +__apicdebuginit(void) print_PIC(void) { unsigned int v; unsigned long flags; @@ -1604,7 +1605,17 @@ void /*__init*/ print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -#endif /* 0 */ +__apicdebuginit(int) print_all_ICs(void) +{ + print_PIC(); + print_all_local_APICs(); + print_IO_APIC(); + + return 0; +} + +fs_initcall(print_all_ICs); + static void __init enable_IO_APIC(void) { @@ -2333,8 +2344,6 @@ void __init setup_IO_APIC(void) setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); - if (!acpi_ioapic) - print_IO_APIC(); } /* diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 8269434..8cdcc4f 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -53,6 +53,8 @@ #include #include +#define __apicdebuginit(type) static type __init + struct irq_cfg { cpumask_t domain; cpumask_t old_domain; @@ -87,8 +89,6 @@ int first_system_vector = 0xfe; char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; -#define __apicdebuginit __init - int sis_apic_bug; /* not actually supported, dummy for compile */ static int no_timer_check; @@ -965,7 +965,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, ioapic_write_entry(apic, pin, entry); } -void __apicdebuginit print_IO_APIC(void) + +__apicdebuginit(void) print_IO_APIC(void) { int apic, i; union IO_APIC_reg_00 reg_00; @@ -1059,9 +1060,7 @@ void __apicdebuginit print_IO_APIC(void) return; } -#if 0 - -static __apicdebuginit void print_APIC_bitfield (int base) +__apicdebuginit(void) print_APIC_bitfield(int base) { unsigned int v; int i, j; @@ -1082,7 +1081,7 @@ static __apicdebuginit void print_APIC_bitfield (int base) } } -void __apicdebuginit print_local_APIC(void * dummy) +__apicdebuginit(void) print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; @@ -1159,12 +1158,12 @@ void __apicdebuginit print_local_APIC(void * dummy) printk("\n"); } -void print_all_local_APICs (void) +__apicdebuginit(void) print_all_local_APICs(void) { on_each_cpu(print_local_APIC, NULL, 1); } -void __apicdebuginit print_PIC(void) +__apicdebuginit(void) print_PIC(void) { unsigned int v; unsigned long flags; @@ -1196,7 +1195,17 @@ void __apicdebuginit print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -#endif /* 0 */ +__apicdebuginit(int) print_all_ICs(void) +{ + print_PIC(); + print_all_local_APICs(); + print_IO_APIC(); + + return 0; +} + +fs_initcall(print_all_ICs); + void __init enable_IO_APIC(void) { @@ -1849,8 +1858,6 @@ void __init setup_IO_APIC(void) setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); - if (!acpi_ioapic) - print_IO_APIC(); } struct sysfs_ioapic_data { diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 19af069..1d88d2b 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -250,10 +250,5 @@ int __init pci_acpi_init(void) acpi_pci_irq_enable(dev); } -#ifdef CONFIG_X86_IO_APIC - if (acpi_ioapic) - print_IO_APIC(); -#endif - return 0; } diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index 77ba51d..83e0048 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -64,7 +64,6 @@ extern unsigned long io_apic_irqs; extern void init_VISWS_APIC_irqs(void); extern void setup_IO_APIC(void); extern void disable_IO_APIC(void); -extern void print_IO_APIC(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); extern void setup_ioapic_dest(void); -- cgit v0.10.2 From 0791e13fbb1ea4e1808d055922c3f116b924bdc9 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 21 Jul 2008 01:28:43 +0100 Subject: x86: fix up a comment in ack_APIC_irq() Adjust a comment in ack_APIC_irq() according to the recent removal of CONFIG_X86_GOOD_APIC. Signed-off-by: Maciej W. Rozycki Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index 133c998..e9e09b2 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -76,9 +76,7 @@ extern int get_physical_broadcast(void); static inline void ack_APIC_irq(void) { /* - * ack_APIC_irq() actually gets compiled as a single instruction: - * - a single rmw on Pentium/82489DX - * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC) + * ack_APIC_irq() actually gets compiled as a single instruction * ... yummie. */ -- cgit v0.10.2 From 510b37258dfd61693ca6c039865c78bd996e3718 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 22 Jul 2008 13:20:22 -0700 Subject: x86: move declaring x2apic_extra_bits it is for uv only Signed-off-by: Yinghai Lu Cc: Jack Steiner Cc: Suresh Siddha Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index f35a3bf..3229c68 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -10,8 +10,6 @@ #include #include -DEFINE_PER_CPU(int, x2apic_extra_bits); - static int x2apic_phys; static int set_x2apic_phys_mode(char *arg) diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 14a97727..169388c 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -26,6 +26,8 @@ #include #include +DEFINE_PER_CPU(int, x2apic_extra_bits); + static enum uv_system_type uv_system_type; static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -- cgit v0.10.2 From 36a028de785c6e6f15ac84f8b3b087b89137ea26 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 24 Jul 2008 13:52:28 +0200 Subject: x86: apic unification - merge down lapic_get_maxlvt No code change on binary level. Signed-off-by: Cyrill Gorcunov Cc: macro@linux-mips.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d6c8983..447dd8c 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -193,9 +193,13 @@ int get_physical_broadcast(void) */ int lapic_get_maxlvt(void) { - unsigned int v = apic_read(APIC_LVR); + unsigned int v; - /* 82489DXs do not report # of LVT entries. */ + v = apic_read(APIC_LVR); + /* + * - we always have APIC integrated on 64bit mode + * - 82489DXs do not report # of LVT entries + */ return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 7f1f030..4fa2a86 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -158,11 +158,14 @@ void __cpuinit enable_NMI_through_LVT0(void) */ int lapic_get_maxlvt(void) { - unsigned int v, maxlvt; + unsigned int v; v = apic_read(APIC_LVR); - maxlvt = GET_APIC_MAXLVT(v); - return maxlvt; + /* + * - we always have APIC integrated on 64bit mode + * - 82489DXs do not report # of LVT entries + */ + return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } /* -- cgit v0.10.2 From d4c63ec060f3315653c0ae5bc3a7fe2419a2282f Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 24 Jul 2008 13:52:29 +0200 Subject: x86: apic unification - merge down enable_NMI_through_LVT0 No code change on binary level. Signed-off-by: Cyrill Gorcunov Cc: macro@linux-mips.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 447dd8c..01708f1 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -172,11 +172,15 @@ u32 safe_apic_wait_icr_idle(void) */ void __cpuinit enable_NMI_through_LVT0(void) { - unsigned int v = APIC_DM_NMI; + unsigned int v; + + /* unmask and set to NMI */ + v = APIC_DM_NMI; - /* Level triggered for 82489DX */ + /* Level triggered for 82489DX (32bit mode) */ if (!lapic_is_integrated()) v |= APIC_LVT_LEVEL_TRIGGER; + apic_write(APIC_LVT0, v); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 4fa2a86..7615b4b 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -150,6 +150,11 @@ void __cpuinit enable_NMI_through_LVT0(void) /* unmask and set to NMI */ v = APIC_DM_NMI; + + /* Level triggered for 82489DX (32bit mode) */ + if (!lapic_is_integrated()) + v |= APIC_LVT_LEVEL_TRIGGER; + apic_write(APIC_LVT0, v); } -- cgit v0.10.2 From 461d159694d683c9e43ead07720a3a0f17f6f060 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Fri, 25 Jul 2008 09:19:05 +0530 Subject: x86_64: Declare new_utsname in asm-x86/syscalls.h Signed-off-by: Jaswinder Singh diff --git a/include/asm-x86/syscalls.h b/include/asm-x86/syscalls.h index 3d6b159..87803da 100644 --- a/include/asm-x86/syscalls.h +++ b/include/asm-x86/syscalls.h @@ -5,7 +5,6 @@ * * This file is released under the GPLv2. * See the file COPYING for more details. - * */ #ifndef _ASM_X86_SYSCALLS_H @@ -87,6 +86,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *); /* kernel/sys_x86_64.c */ asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); +struct new_utsname; asmlinkage long sys_uname(struct new_utsname __user *); #endif /* CONFIG_X86_32 */ -- cgit v0.10.2 From 4fe702c7e401f912c0edd294af6e37c02f451bbb Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Fri, 25 Jul 2008 10:19:27 +0530 Subject: X86_32: declare pt_regs_access as unsigned long Fixed pt_regs_access to unsigned long as per X86_64 Signed-off-by: Jaswinder Singh diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index e37dccc..fc3e8dc 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -69,7 +69,7 @@ static inline bool invalid_selector(u16 value) #define FLAG_MASK FLAG_MASK_32 -static long *pt_regs_access(struct pt_regs *regs, unsigned long regno) +static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) { BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); regno >>= 2; -- cgit v0.10.2 From e7f08dfdaa82def3d685d16fdb99203cbb67ec95 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Fri, 25 Jul 2008 10:42:26 +0530 Subject: X86_SMP: smp.c declare functions before they get used declared following smp interrupts in asm-x86/hw_irq.h: smp_reschedule_interrupt, smp_call_function_interrupt, smp_call_function_single_interrupt Signed-off-by: Jaswinder Singh diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index 40b941a..08900eb 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -102,6 +102,11 @@ extern void smp_error_interrupt(struct pt_regs *); extern asmlinkage void smp_spurious_interrupt(void); extern asmlinkage void smp_error_interrupt(void); #endif +#ifdef CONFIG_X86_SMP +extern void smp_reschedule_interrupt(struct pt_regs *); +extern void smp_call_function_interrupt(struct pt_regs *); +extern void smp_call_function_single_interrupt(struct pt_regs *); +#endif #ifdef CONFIG_X86_32 extern void (*const interrupt[NR_IRQS])(void); -- cgit v0.10.2 From f86c99853b22576ee8dc4fa27ff6f3c0c7ce0ef8 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Fri, 25 Jul 2008 10:52:53 +0530 Subject: X86_SMP: smpboot.c declare idle_thread_array and smp_b_stepping as static Signed-off-by: Jaswinder Singh diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4b53a64..a9331a4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -88,7 +88,7 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); #define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) #define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) #else -struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; +static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; #define get_idle_for_cpu(x) (idle_thread_array[(x)]) #define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) #endif @@ -129,7 +129,7 @@ static int boot_cpu_logical_apicid; static cpumask_t cpu_sibling_setup_map; /* Set if we find a B stepping CPU */ -int __cpuinitdata smp_b_stepping; +static int __cpuinitdata smp_b_stepping; #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) -- cgit v0.10.2 From 2907829cd0ffdef69083985ba28cf1cf3857c681 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Fri, 25 Jul 2008 11:05:56 +0530 Subject: X86_SMP: ipi.c declare functions before they get used move upwards for __send_IPI_shortcut and send_IPI_mask_bitmask Signed-off-by: Jaswinder Singh diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 3f7537b..f1c688e 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -20,6 +20,8 @@ #ifdef CONFIG_X86_32 #include +#include + /* * the following functions deal with sending IPIs between CPUs. * @@ -147,7 +149,6 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector) } /* must come after the send_IPI functions above for inlining */ -#include static int convert_apicid_to_cpu(int apic_id) { int i; -- cgit v0.10.2 From b2139aa0eec330c711c5a279db361e5ef1178e78 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Fri, 25 Jul 2008 11:32:38 +0530 Subject: X86_SMP: tlb_XX.c declare smp_invalidate_interrupt before they get used declare smp_invalidate_interrupt in asm-x86/hw_irq.h for X86_32 and X86_64 Signed-off-by: Jaswinder Singh diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index 08900eb..67685f1 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -106,6 +106,11 @@ extern asmlinkage void smp_error_interrupt(void); extern void smp_reschedule_interrupt(struct pt_regs *); extern void smp_call_function_interrupt(struct pt_regs *); extern void smp_call_function_single_interrupt(struct pt_regs *); +#ifdef CONFIG_X86_32 +extern void smp_invalidate_interrupt(struct pt_regs *); +#else +extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *); +#endif #endif #ifdef CONFIG_X86_32 -- cgit v0.10.2 From 3c9339049df5cc3a468c11de6c4101a1ea8c3d83 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 25 Jul 2008 11:32:36 +0200 Subject: x86: fix ds.c build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: arch/x86/kernel/ds.c: In function ‘ds_allocate_buffer': arch/x86/kernel/ds.c:339: error: implicit declaration of function ‘PAGE_ALIGN' Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 24a323c..ab21c27 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -29,6 +29,7 @@ #include #include #include +#include /* -- cgit v0.10.2 From 021f8b75e78f9da67421a2c2e320e8934a90914a Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:08:45 +0200 Subject: x86: add PCI IDs for AMD Barcelona PCI devices Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c3b1761..917d48e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -497,6 +497,11 @@ #define PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP 0x1101 #define PCI_DEVICE_ID_AMD_K8_NB_MEMCTL 0x1102 #define PCI_DEVICE_ID_AMD_K8_NB_MISC 0x1103 +#define PCI_DEVICE_ID_AMD_10H_NB_HT 0x1200 +#define PCI_DEVICE_ID_AMD_10H_NB_MAP 0x1201 +#define PCI_DEVICE_ID_AMD_10H_NB_DRAM 0x1202 +#define PCI_DEVICE_ID_AMD_10H_NB_MISC 0x1203 +#define PCI_DEVICE_ID_AMD_10H_NB_LINK 0x1204 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 #define PCI_DEVICE_ID_AMD_SCSI 0x2020 -- cgit v0.10.2 From 286f571837ba9d67625afd015366d79345abb414 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:08:46 +0200 Subject: x86: apic_*.c: add description to AMD's extended LVT functions Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d6c8983..fad94b0 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -646,6 +646,9 @@ int setup_profiling_timer(unsigned int multiplier) * * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and * MCE interrupts are supported. Thus MCE offset must be set to 0. + * + * If mask=1, the LVT entry does not generate interrupts while mask=0 + * enables the vector. See also the BKDGs. */ #define APIC_EILVT_LVTOFF_MCE 0 diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 7f1f030..42bf69f 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -205,6 +205,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) * * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and * MCE interrupts are supported. Thus MCE offset must be set to 0. + * + * If mask=1, the LVT entry does not generate interrupts while mask=0 + * enables the vector. See also the BKDGs. */ #define APIC_EILVT_LVTOFF_MCE 0 -- cgit v0.10.2 From 12f2b2610e812627acf338aaf043fef20bb726ca Mon Sep 17 00:00:00 2001 From: Barry Kasindorf Date: Tue, 22 Jul 2008 21:08:47 +0200 Subject: oprofile: Add support for AMD Family 11h This patch add support for AMD Family 11h CPUs. Signed-off-by: Barry Kasindorf Signed-off-by: Robert Richter Cc: oprofile-list Signed-off-by: Ingo Molnar diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 3f90289..33db99a 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -436,6 +436,10 @@ int __init op_nmi_init(struct oprofile_operations *ops) model = &op_athlon_spec; cpu_type = "x86-64/family10"; break; + case 0x11: + model = &op_athlon_spec; + cpu_type = "x86-64/family11h"; + break; } break; -- cgit v0.10.2 From adf5ec0bca553b763a6b9baed2677a4c7470025b Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:08:48 +0200 Subject: x86/oprofile: introduce model specific init/exit functions This patch implements model specific OProfile init/exit functions for x86 CPUs. Though there is more rework needed at the initialization code, this new introduced functions allow it to keep model specific code in the corresponding op_model_*.c files. The function interface is the same as for oprofile_arch_init/exit(). Signed-off-by: Robert Richter Cc: oprofile-list Signed-off-by: Ingo Molnar diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 33db99a..75e8891 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -1,10 +1,11 @@ /** * @file nmi_int.c * - * @remark Copyright 2002 OProfile authors + * @remark Copyright 2002-2008 OProfile authors * @remark Read the file COPYING * * @author John Levon + * @author Robert Richter */ #include @@ -411,6 +412,7 @@ int __init op_nmi_init(struct oprofile_operations *ops) __u8 vendor = boot_cpu_data.x86_vendor; __u8 family = boot_cpu_data.x86; char *cpu_type; + int ret = 0; if (!cpu_has_apic) return -ENODEV; @@ -466,6 +468,11 @@ int __init op_nmi_init(struct oprofile_operations *ops) return -ENODEV; } + if (model->init) + ret = model->init(ops); + if (ret) + return ret; + init_sysfs(); using_nmi = 1; ops->create_files = nmi_create_files; @@ -482,4 +489,6 @@ void op_nmi_exit(void) { if (using_nmi) exit_sysfs(); + if (model->exit) + model->exit(); } diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index 3d53487..dd8b1dc 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c @@ -1,14 +1,15 @@ /* - * @file op_model_athlon.h + * @file op_model_athlon.c * athlon / K7 / K8 / Family 10h model-specific MSR operations * - * @remark Copyright 2002 OProfile authors + * @remark Copyright 2002-2008 OProfile authors * @remark Read the file COPYING * * @author John Levon * @author Philippe Elie * @author Graydon Hoare - */ + * @author Robert Richter +*/ #include #include @@ -178,7 +179,18 @@ static void athlon_shutdown(struct op_msrs const * const msrs) } } +static int op_amd_init(struct oprofile_operations *ops) +{ + return 0; +} + +static void op_amd_exit(void) +{ +} + struct op_x86_model_spec const op_athlon_spec = { + .init = op_amd_init, + .exit = op_amd_exit, .num_counters = NUM_COUNTERS, .num_controls = NUM_CONTROLS, .fill_in_addresses = &athlon_fill_in_addresses, diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 45b605f..ee9ca96 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -32,6 +32,8 @@ struct pt_regs; * various x86 CPU models' perfctr support. */ struct op_x86_model_spec { + int (*init)(struct oprofile_operations *ops); + void (*exit)(void); unsigned int const num_counters; unsigned int const num_controls; void (*fill_in_addresses)(struct op_msrs * const msrs); -- cgit v0.10.2 From dfa154289701ed315909b4e371a0381c52c8bdc9 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:08:49 +0200 Subject: x86/oprofile: Minor changes in op_model_athlon.c Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index dd8b1dc..d25d7f1 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c @@ -45,6 +45,8 @@ static unsigned long reset_value[NUM_COUNTERS]; +/* functions for op_athlon_spec */ + static void athlon_fill_in_addresses(struct op_msrs * const msrs) { int i; -- cgit v0.10.2 From 6657fe4f5650ff7174d418d4bfa50c4640e81a2b Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:08:50 +0200 Subject: x86/oprofile: renaming athlon_*() into op_amd_*() These functions contain code for all AMD CPUs. The new names fit better. Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 75e8891..b298193 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -425,21 +425,21 @@ int __init op_nmi_init(struct oprofile_operations *ops) default: return -ENODEV; case 6: - model = &op_athlon_spec; + model = &op_amd_spec; cpu_type = "i386/athlon"; break; case 0xf: - model = &op_athlon_spec; + model = &op_amd_spec; /* Actually it could be i386/hammer too, but give user space an consistent name. */ cpu_type = "x86-64/hammer"; break; case 0x10: - model = &op_athlon_spec; + model = &op_amd_spec; cpu_type = "x86-64/family10"; break; case 0x11: - model = &op_athlon_spec; + model = &op_amd_spec; cpu_type = "x86-64/family11h"; break; } diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index d25d7f1..40ecb02 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c @@ -45,9 +45,9 @@ static unsigned long reset_value[NUM_COUNTERS]; -/* functions for op_athlon_spec */ +/* functions for op_amd_spec */ -static void athlon_fill_in_addresses(struct op_msrs * const msrs) +static void op_amd_fill_in_addresses(struct op_msrs * const msrs) { int i; @@ -67,7 +67,7 @@ static void athlon_fill_in_addresses(struct op_msrs * const msrs) } -static void athlon_setup_ctrs(struct op_msrs const * const msrs) +static void op_amd_setup_ctrs(struct op_msrs const * const msrs) { unsigned int low, high; int i; @@ -116,7 +116,7 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) } -static int athlon_check_ctrs(struct pt_regs * const regs, +static int op_amd_check_ctrs(struct pt_regs * const regs, struct op_msrs const * const msrs) { unsigned int low, high; @@ -137,7 +137,7 @@ static int athlon_check_ctrs(struct pt_regs * const regs, } -static void athlon_start(struct op_msrs const * const msrs) +static void op_amd_start(struct op_msrs const * const msrs) { unsigned int low, high; int i; @@ -151,7 +151,7 @@ static void athlon_start(struct op_msrs const * const msrs) } -static void athlon_stop(struct op_msrs const * const msrs) +static void op_amd_stop(struct op_msrs const * const msrs) { unsigned int low, high; int i; @@ -167,7 +167,7 @@ static void athlon_stop(struct op_msrs const * const msrs) } } -static void athlon_shutdown(struct op_msrs const * const msrs) +static void op_amd_shutdown(struct op_msrs const * const msrs) { int i; @@ -190,15 +190,15 @@ static void op_amd_exit(void) { } -struct op_x86_model_spec const op_athlon_spec = { +struct op_x86_model_spec const op_amd_spec = { .init = op_amd_init, .exit = op_amd_exit, .num_counters = NUM_COUNTERS, .num_controls = NUM_CONTROLS, - .fill_in_addresses = &athlon_fill_in_addresses, - .setup_ctrs = &athlon_setup_ctrs, - .check_ctrs = &athlon_check_ctrs, - .start = &athlon_start, - .stop = &athlon_stop, - .shutdown = &athlon_shutdown + .fill_in_addresses = &op_amd_fill_in_addresses, + .setup_ctrs = &op_amd_setup_ctrs, + .check_ctrs = &op_amd_check_ctrs, + .start = &op_amd_start, + .stop = &op_amd_stop, + .shutdown = &op_amd_shutdown }; diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index ee9ca96..05a0261 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -48,6 +48,6 @@ struct op_x86_model_spec { extern struct op_x86_model_spec const op_ppro_spec; extern struct op_x86_model_spec const op_p4_spec; extern struct op_x86_model_spec const op_p4_ht2_spec; -extern struct op_x86_model_spec const op_athlon_spec; +extern struct op_x86_model_spec const op_amd_spec; #endif /* OP_X86_MODEL_H */ -- cgit v0.10.2 From 73185e0a5d11d729d451692034fbe55a9eba7468 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:08:51 +0200 Subject: drivers/oprofile: coding style fixes in buffer_sync.c Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index 9304c45..69a73277 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -33,7 +33,7 @@ #include "event_buffer.h" #include "cpu_buffer.h" #include "buffer_sync.h" - + static LIST_HEAD(dying_tasks); static LIST_HEAD(dead_tasks); static cpumask_t marked_cpus = CPU_MASK_NONE; @@ -48,10 +48,11 @@ static void process_task_mortuary(void); * Can be invoked from softirq via RCU callback due to * call_rcu() of the task struct, hence the _irqsave. */ -static int task_free_notify(struct notifier_block * self, unsigned long val, void * data) +static int +task_free_notify(struct notifier_block *self, unsigned long val, void *data) { unsigned long flags; - struct task_struct * task = data; + struct task_struct *task = data; spin_lock_irqsave(&task_mortuary, flags); list_add(&task->tasks, &dying_tasks); spin_unlock_irqrestore(&task_mortuary, flags); @@ -62,13 +63,14 @@ static int task_free_notify(struct notifier_block * self, unsigned long val, voi /* The task is on its way out. A sync of the buffer means we can catch * any remaining samples for this task. */ -static int task_exit_notify(struct notifier_block * self, unsigned long val, void * data) +static int +task_exit_notify(struct notifier_block *self, unsigned long val, void *data) { /* To avoid latency problems, we only process the current CPU, * hoping that most samples for the task are on this CPU */ sync_buffer(raw_smp_processor_id()); - return 0; + return 0; } @@ -77,11 +79,12 @@ static int task_exit_notify(struct notifier_block * self, unsigned long val, voi * we don't lose any. This does not have to be exact, it's a QoI issue * only. */ -static int munmap_notify(struct notifier_block * self, unsigned long val, void * data) +static int +munmap_notify(struct notifier_block *self, unsigned long val, void *data) { unsigned long addr = (unsigned long)data; - struct mm_struct * mm = current->mm; - struct vm_area_struct * mpnt; + struct mm_struct *mm = current->mm; + struct vm_area_struct *mpnt; down_read(&mm->mmap_sem); @@ -99,11 +102,12 @@ static int munmap_notify(struct notifier_block * self, unsigned long val, void * return 0; } - + /* We need to be told about new modules so we don't attribute to a previously * loaded module, or drop the samples on the floor. */ -static int module_load_notify(struct notifier_block * self, unsigned long val, void * data) +static int +module_load_notify(struct notifier_block *self, unsigned long val, void *data) { #ifdef CONFIG_MODULES if (val != MODULE_STATE_COMING) @@ -118,7 +122,7 @@ static int module_load_notify(struct notifier_block * self, unsigned long val, v return 0; } - + static struct notifier_block task_free_nb = { .notifier_call = task_free_notify, }; @@ -135,7 +139,7 @@ static struct notifier_block module_load_nb = { .notifier_call = module_load_notify, }; - + static void end_sync(void) { end_cpu_work(); @@ -208,14 +212,14 @@ static inline unsigned long fast_get_dcookie(struct path *path) * not strictly necessary but allows oprofile to associate * shared-library samples with particular applications */ -static unsigned long get_exec_dcookie(struct mm_struct * mm) +static unsigned long get_exec_dcookie(struct mm_struct *mm) { unsigned long cookie = NO_COOKIE; - struct vm_area_struct * vma; - + struct vm_area_struct *vma; + if (!mm) goto out; - + for (vma = mm->mmap; vma; vma = vma->vm_next) { if (!vma->vm_file) continue; @@ -235,13 +239,14 @@ out: * sure to do this lookup before a mm->mmap modification happens so * we don't lose track. */ -static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, off_t * offset) +static unsigned long +lookup_dcookie(struct mm_struct *mm, unsigned long addr, off_t *offset) { unsigned long cookie = NO_COOKIE; - struct vm_area_struct * vma; + struct vm_area_struct *vma; for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { - + if (addr < vma->vm_start || addr >= vma->vm_end) continue; @@ -265,7 +270,7 @@ static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, o static unsigned long last_cookie = INVALID_COOKIE; - + static void add_cpu_switch(int i) { add_event_entry(ESCAPE_CODE); @@ -278,16 +283,16 @@ static void add_kernel_ctx_switch(unsigned int in_kernel) { add_event_entry(ESCAPE_CODE); if (in_kernel) - add_event_entry(KERNEL_ENTER_SWITCH_CODE); + add_event_entry(KERNEL_ENTER_SWITCH_CODE); else - add_event_entry(KERNEL_EXIT_SWITCH_CODE); + add_event_entry(KERNEL_EXIT_SWITCH_CODE); } - + static void -add_user_ctx_switch(struct task_struct const * task, unsigned long cookie) +add_user_ctx_switch(struct task_struct const *task, unsigned long cookie) { add_event_entry(ESCAPE_CODE); - add_event_entry(CTX_SWITCH_CODE); + add_event_entry(CTX_SWITCH_CODE); add_event_entry(task->pid); add_event_entry(cookie); /* Another code for daemon back-compat */ @@ -296,7 +301,7 @@ add_user_ctx_switch(struct task_struct const * task, unsigned long cookie) add_event_entry(task->tgid); } - + static void add_cookie_switch(unsigned long cookie) { add_event_entry(ESCAPE_CODE); @@ -304,7 +309,7 @@ static void add_cookie_switch(unsigned long cookie) add_event_entry(cookie); } - + static void add_trace_begin(void) { add_event_entry(ESCAPE_CODE); @@ -319,13 +324,13 @@ static void add_sample_entry(unsigned long offset, unsigned long event) } -static int add_us_sample(struct mm_struct * mm, struct op_sample * s) +static int add_us_sample(struct mm_struct *mm, struct op_sample *s) { unsigned long cookie; off_t offset; - - cookie = lookup_dcookie(mm, s->eip, &offset); - + + cookie = lookup_dcookie(mm, s->eip, &offset); + if (cookie == INVALID_COOKIE) { atomic_inc(&oprofile_stats.sample_lost_no_mapping); return 0; @@ -341,13 +346,13 @@ static int add_us_sample(struct mm_struct * mm, struct op_sample * s) return 1; } - + /* Add a sample to the global event buffer. If possible the * sample is converted into a persistent dentry/offset pair * for later lookup from userspace. */ static int -add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel) +add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel) { if (in_kernel) { add_sample_entry(s->eip, s->event); @@ -359,9 +364,9 @@ add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel) } return 0; } - -static void release_mm(struct mm_struct * mm) + +static void release_mm(struct mm_struct *mm) { if (!mm) return; @@ -370,9 +375,9 @@ static void release_mm(struct mm_struct * mm) } -static struct mm_struct * take_tasks_mm(struct task_struct * task) +static struct mm_struct *take_tasks_mm(struct task_struct *task) { - struct mm_struct * mm = get_task_mm(task); + struct mm_struct *mm = get_task_mm(task); if (mm) down_read(&mm->mmap_sem); return mm; @@ -383,10 +388,10 @@ static inline int is_code(unsigned long val) { return val == ESCAPE_CODE; } - + /* "acquire" as many cpu buffer slots as we can */ -static unsigned long get_slots(struct oprofile_cpu_buffer * b) +static unsigned long get_slots(struct oprofile_cpu_buffer *b) { unsigned long head = b->head_pos; unsigned long tail = b->tail_pos; @@ -412,7 +417,7 @@ static unsigned long get_slots(struct oprofile_cpu_buffer * b) } -static void increment_tail(struct oprofile_cpu_buffer * b) +static void increment_tail(struct oprofile_cpu_buffer *b) { unsigned long new_tail = b->tail_pos + 1; @@ -435,8 +440,8 @@ static void process_task_mortuary(void) { unsigned long flags; LIST_HEAD(local_dead_tasks); - struct task_struct * task; - struct task_struct * ttask; + struct task_struct *task; + struct task_struct *ttask; spin_lock_irqsave(&task_mortuary, flags); @@ -493,7 +498,7 @@ void sync_buffer(int cpu) { struct oprofile_cpu_buffer *cpu_buf = &per_cpu(cpu_buffer, cpu); struct mm_struct *mm = NULL; - struct task_struct * new; + struct task_struct *new; unsigned long cookie = 0; int in_kernel = 1; unsigned int i; @@ -501,7 +506,7 @@ void sync_buffer(int cpu) unsigned long available; mutex_lock(&buffer_mutex); - + add_cpu_switch(cpu); /* Remember, only we can modify tail_pos */ @@ -509,8 +514,8 @@ void sync_buffer(int cpu) available = get_slots(cpu_buf); for (i = 0; i < available; ++i) { - struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos]; - + struct op_sample *s = &cpu_buf->buffer[cpu_buf->tail_pos]; + if (is_code(s->eip)) { if (s->event <= CPU_IS_KERNEL) { /* kernel/userspace switch */ @@ -522,7 +527,7 @@ void sync_buffer(int cpu) state = sb_bt_start; add_trace_begin(); } else { - struct mm_struct * oldmm = mm; + struct mm_struct *oldmm = mm; /* userspace context switch */ new = (struct task_struct *)s->event; @@ -533,13 +538,11 @@ void sync_buffer(int cpu) cookie = get_exec_dcookie(mm); add_user_ctx_switch(new, cookie); } - } else { - if (state >= sb_bt_start && - !add_sample(mm, s, in_kernel)) { - if (state == sb_bt_start) { - state = sb_bt_ignore; - atomic_inc(&oprofile_stats.bt_lost_no_mapping); - } + } else if (state >= sb_bt_start && + !add_sample(mm, s, in_kernel)) { + if (state == sb_bt_start) { + state = sb_bt_ignore; + atomic_inc(&oprofile_stats.bt_lost_no_mapping); } } -- cgit v0.10.2 From 5e11f98dcebc40c9d67e8d21a5f47248444c17a8 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:08:52 +0200 Subject: OProfile: moving increment_tail() in buffer_sync.c Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index 69a73277..615929f 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -268,6 +268,17 @@ lookup_dcookie(struct mm_struct *mm, unsigned long addr, off_t *offset) return cookie; } +static void increment_tail(struct oprofile_cpu_buffer *b) +{ + unsigned long new_tail = b->tail_pos + 1; + + rmb(); + + if (new_tail < b->buffer_size) + b->tail_pos = new_tail; + else + b->tail_pos = 0; +} static unsigned long last_cookie = INVALID_COOKIE; @@ -417,19 +428,6 @@ static unsigned long get_slots(struct oprofile_cpu_buffer *b) } -static void increment_tail(struct oprofile_cpu_buffer *b) -{ - unsigned long new_tail = b->tail_pos + 1; - - rmb(); - - if (new_tail < b->buffer_size) - b->tail_pos = new_tail; - else - b->tail_pos = 0; -} - - /* Move tasks along towards death. Any tasks on dead_tasks * will definitely have no remaining references in any * CPU buffers at this point, because we use two lists, -- cgit v0.10.2 From ee648bc77f11b57d15a68d336fc30e343198f893 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:08:53 +0200 Subject: OProfile: add IBS code macros Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 041bb31..bcb8f72 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -36,6 +36,8 @@ #define XEN_ENTER_SWITCH_CODE 10 #define SPU_PROFILING_CODE 11 #define SPU_CTX_SWITCH_CODE 12 +#define IBS_FETCH_CODE 13 +#define IBS_OP_CODE 14 struct super_block; struct dentry; -- cgit v0.10.2 From 345c25730d085c45622ac779da4dbd97dc3a10fe Mon Sep 17 00:00:00 2001 From: Barry Kasindorf Date: Tue, 22 Jul 2008 21:08:54 +0200 Subject: x86/oprofile: add IBS support for AMD CPUs, IBS buffer handling routines This patchset supports the new profiling hardware available in the latest AMD CPUs in the oProfile driver. Signed-off-by: Barry Kasindorf Signed-off-by: Robert Richter Cc: oprofile-list Signed-off-by: Ingo Molnar diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index 615929f..e1782d2 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -5,6 +5,7 @@ * @remark Read the file COPYING * * @author John Levon + * @author Barry Kasindorf * * This is the core of the buffer management. Each * CPU buffer is processed and entered into the @@ -272,7 +273,7 @@ static void increment_tail(struct oprofile_cpu_buffer *b) { unsigned long new_tail = b->tail_pos + 1; - rmb(); + rmb(); /* be sure fifo pointers are synchromized */ if (new_tail < b->buffer_size) b->tail_pos = new_tail; @@ -327,6 +328,67 @@ static void add_trace_begin(void) add_event_entry(TRACE_BEGIN_CODE); } +#define IBS_FETCH_CODE_SIZE 2 +#define IBS_OP_CODE_SIZE 5 +#define IBS_EIP(offset) \ + (((struct op_sample *)&cpu_buf->buffer[(offset)])->eip) +#define IBS_EVENT(offset) \ + (((struct op_sample *)&cpu_buf->buffer[(offset)])->event) + +/* + * Add IBS fetch and op entries to event buffer + */ +static void add_ibs_begin(struct oprofile_cpu_buffer *cpu_buf, int code, + int in_kernel, struct mm_struct *mm) +{ + unsigned long rip; + int i, count; + unsigned long ibs_cookie = 0; + off_t offset; + + increment_tail(cpu_buf); /* move to RIP entry */ + + rip = IBS_EIP(cpu_buf->tail_pos); + +#ifdef __LP64__ + rip += IBS_EVENT(cpu_buf->tail_pos) << 32; +#endif + + if (mm) { + ibs_cookie = lookup_dcookie(mm, rip, &offset); + + if (ibs_cookie == NO_COOKIE) + offset = rip; + if (ibs_cookie == INVALID_COOKIE) { + atomic_inc(&oprofile_stats.sample_lost_no_mapping); + offset = rip; + } + if (ibs_cookie != last_cookie) { + add_cookie_switch(ibs_cookie); + last_cookie = ibs_cookie; + } + } else + offset = rip; + + add_event_entry(ESCAPE_CODE); + add_event_entry(code); + add_event_entry(offset); /* Offset from Dcookie */ + + /* we send the Dcookie offset, but send the raw Linear Add also*/ + add_event_entry(IBS_EIP(cpu_buf->tail_pos)); + add_event_entry(IBS_EVENT(cpu_buf->tail_pos)); + + if (code == IBS_FETCH_CODE) + count = IBS_FETCH_CODE_SIZE; /*IBS FETCH is 2 int64s*/ + else + count = IBS_OP_CODE_SIZE; /*IBS OP is 5 int64s*/ + + for (i = 0; i < count; i++) { + increment_tail(cpu_buf); + add_event_entry(IBS_EIP(cpu_buf->tail_pos)); + add_event_entry(IBS_EVENT(cpu_buf->tail_pos)); + } +} static void add_sample_entry(unsigned long offset, unsigned long event) { @@ -524,6 +586,14 @@ void sync_buffer(int cpu) } else if (s->event == CPU_TRACE_BEGIN) { state = sb_bt_start; add_trace_begin(); + } else if (s->event == IBS_FETCH_BEGIN) { + state = sb_bt_start; + add_ibs_begin(cpu_buf, + IBS_FETCH_CODE, in_kernel, mm); + } else if (s->event == IBS_OP_BEGIN) { + state = sb_bt_start; + add_ibs_begin(cpu_buf, + IBS_OP_CODE, in_kernel, mm); } else { struct mm_struct *oldmm = mm; diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 2450b3a..c9ac4e1 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -5,6 +5,7 @@ * @remark Read the file COPYING * * @author John Levon + * @author Barry Kasindorf * * Each CPU has a local buffer that stores PC value/event * pairs. We also log context switches when we notice them. @@ -207,7 +208,7 @@ static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc, return 1; } -static int oprofile_begin_trace(struct oprofile_cpu_buffer * cpu_buf) +static int oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf) { if (nr_available_slots(cpu_buf) < 4) { cpu_buf->sample_lost_overflow++; @@ -252,6 +253,71 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) oprofile_add_ext_sample(pc, regs, event, is_kernel); } +#define MAX_IBS_SAMPLE_SIZE 14 +static int log_ibs_sample(struct oprofile_cpu_buffer *cpu_buf, + unsigned long pc, int is_kernel, unsigned int *ibs, int ibs_code) +{ + struct task_struct *task; + + cpu_buf->sample_received++; + + if (nr_available_slots(cpu_buf) < MAX_IBS_SAMPLE_SIZE) { + cpu_buf->sample_lost_overflow++; + return 0; + } + + is_kernel = !!is_kernel; + + /* notice a switch from user->kernel or vice versa */ + if (cpu_buf->last_is_kernel != is_kernel) { + cpu_buf->last_is_kernel = is_kernel; + add_code(cpu_buf, is_kernel); + } + + /* notice a task switch */ + if (!is_kernel) { + task = current; + + if (cpu_buf->last_task != task) { + cpu_buf->last_task = task; + add_code(cpu_buf, (unsigned long)task); + } + } + + add_code(cpu_buf, ibs_code); + add_sample(cpu_buf, ibs[0], ibs[1]); + add_sample(cpu_buf, ibs[2], ibs[3]); + add_sample(cpu_buf, ibs[4], ibs[5]); + + if (ibs_code == IBS_OP_BEGIN) { + add_sample(cpu_buf, ibs[6], ibs[7]); + add_sample(cpu_buf, ibs[8], ibs[9]); + add_sample(cpu_buf, ibs[10], ibs[11]); + } + + return 1; +} + +void oprofile_add_ibs_sample(struct pt_regs *const regs, + unsigned int * const ibs_sample, u8 code) +{ + int is_kernel = !user_mode(regs); + unsigned long pc = profile_pc(regs); + + struct oprofile_cpu_buffer *cpu_buf = + &per_cpu(cpu_buffer, smp_processor_id()); + + if (!backtrace_depth) { + log_ibs_sample(cpu_buf, pc, is_kernel, ibs_sample, code); + return; + } + + /* if log_sample() fails we can't backtrace since we lost the source + * of this event */ + if (log_ibs_sample(cpu_buf, pc, is_kernel, ibs_sample, code)) + oprofile_ops.backtrace(regs, backtrace_depth); +} + void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) { struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index c3e366b..9c44d00 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h @@ -55,5 +55,7 @@ void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf); /* transient events for the CPU buffer -> event buffer */ #define CPU_IS_KERNEL 1 #define CPU_TRACE_BEGIN 2 +#define IBS_FETCH_BEGIN 3 +#define IBS_OP_BEGIN 4 #endif /* OPROFILE_CPU_BUFFER_H */ -- cgit v0.10.2 From 56784f11df473b4c1d9d0e37777fd7c0b77b6bca Mon Sep 17 00:00:00 2001 From: Barry Kasindorf Date: Tue, 22 Jul 2008 21:08:55 +0200 Subject: x86/oprofile: add IBS support for AMD CPUs, model specific code This patchset supports the new profiling hardware available in the latest AMD CPUs in the oProfile driver. Signed-off-by: Barry Kasindorf Signed-off-by: Robert Richter Cc: oprofile-list Signed-off-by: Ingo Molnar diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index 40ecb02..229e0b4 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c @@ -9,9 +9,13 @@ * @author Philippe Elie * @author Graydon Hoare * @author Robert Richter + * @author Barry Kasindorf */ #include +#include +#include + #include #include #include @@ -43,7 +47,83 @@ #define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9)) #define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) +#define IBS_FETCH_CTL_HIGH_MASK 0xFFFFFFFF +/* high dword bit IbsFetchCtl[bit 49] */ +#define IBS_FETCH_VALID_BIT (1UL << 17) +/* high dword bit IbsFetchCtl[bit 52] */ +#define IBS_FETCH_PHY_ADDR_VALID_BIT (1UL << 20) +/* high dword bit IbsFetchCtl[bit 48] */ +#define IBS_FETCH_ENABLE (1UL << 16) + +#define IBS_FETCH_CTL_CNT_MASK 0x00000000FFFF0000UL +#define IBS_FETCH_CTL_MAX_CNT_MASK 0x000000000000FFFFUL + +/*IbsOpCtl masks/bits */ +#define IBS_OP_VALID_BIT (1ULL<<18) /* IbsOpCtl[bit18] */ +#define IBS_OP_ENABLE (1ULL<<17) /* IBS_OP_ENABLE[bit17]*/ + +/* Codes used in cpu_buffer.c */ +#define IBS_FETCH_BEGIN 3 +#define IBS_OP_BEGIN 4 + +/*IbsOpData3 masks */ +#define IBS_CTL_LVT_OFFSET_VALID_BIT (1ULL<<8) + +/*PCI Extended Configuration Constants */ +/* MSR to set the IBS control register APIC LVT offset */ +#define IBS_LVT_OFFSET_PCI 0x1CC + +struct ibs_fetch_sample { + /* MSRC001_1031 IBS Fetch Linear Address Register */ + unsigned int ibs_fetch_lin_addr_low; + unsigned int ibs_fetch_lin_addr_high; + /* MSRC001_1030 IBS Fetch Control Register */ + unsigned int ibs_fetch_ctl_low; + unsigned int ibs_fetch_ctl_high; + /* MSRC001_1032 IBS Fetch Physical Address Register */ + unsigned int ibs_fetch_phys_addr_low; + unsigned int ibs_fetch_phys_addr_high; +}; + +struct ibs_op_sample { + /* MSRC001_1034 IBS Op Logical Address Register (IbsRIP) */ + unsigned int ibs_op_rip_low; + unsigned int ibs_op_rip_high; + /* MSRC001_1035 IBS Op Data Register */ + unsigned int ibs_op_data1_low; + unsigned int ibs_op_data1_high; + /* MSRC001_1036 IBS Op Data 2 Register */ + unsigned int ibs_op_data2_low; + unsigned int ibs_op_data2_high; + /* MSRC001_1037 IBS Op Data 3 Register */ + unsigned int ibs_op_data3_low; + unsigned int ibs_op_data3_high; + /* MSRC001_1038 IBS DC Linear Address Register (IbsDcLinAd) */ + unsigned int ibs_dc_linear_low; + unsigned int ibs_dc_linear_high; + /* MSRC001_1039 IBS DC Physical Address Register (IbsDcPhysAd) */ + unsigned int ibs_dc_phys_low; + unsigned int ibs_dc_phys_high; +}; + +/* + * unitialize the APIC for the IBS interrupts if needed on AMD Family10h+ +*/ +static void clear_ibs_nmi(void); + static unsigned long reset_value[NUM_COUNTERS]; +static int ibs_allowed; /* AMD Family10h and later */ + +struct op_ibs_config { + unsigned long op_enabled; + unsigned long fetch_enabled; + unsigned long max_cnt_fetch; + unsigned long max_cnt_op; + unsigned long rand_en; + unsigned long dispatched_ops; +}; + +static struct op_ibs_config ibs_config; /* functions for op_amd_spec */ @@ -121,6 +201,8 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, { unsigned int low, high; int i; + struct ibs_fetch_sample ibs_fetch; + struct ibs_op_sample ibs_op; for (i = 0 ; i < NUM_COUNTERS; ++i) { if (!reset_value[i]) @@ -132,6 +214,65 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, } } + /*If AMD and IBS is available */ + if (ibs_allowed && ibs_config.fetch_enabled) { + rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); + if (high & IBS_FETCH_VALID_BIT) { + ibs_fetch.ibs_fetch_ctl_high = high; + ibs_fetch.ibs_fetch_ctl_low = low; + rdmsr(MSR_AMD64_IBSFETCHLINAD, low, high); + ibs_fetch.ibs_fetch_lin_addr_high = high; + ibs_fetch.ibs_fetch_lin_addr_low = low; + rdmsr(MSR_AMD64_IBSFETCHPHYSAD, low, high); + ibs_fetch.ibs_fetch_phys_addr_high = high; + ibs_fetch.ibs_fetch_phys_addr_low = low; + + oprofile_add_ibs_sample(regs, + (unsigned int *)&ibs_fetch, + IBS_FETCH_BEGIN); + + /*reenable the IRQ */ + rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); + high &= ~(IBS_FETCH_VALID_BIT); + high |= IBS_FETCH_ENABLE; + low &= IBS_FETCH_CTL_MAX_CNT_MASK; + wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); + } + } + + if (ibs_allowed && ibs_config.op_enabled) { + rdmsr(MSR_AMD64_IBSOPCTL, low, high); + if (low & IBS_OP_VALID_BIT) { + rdmsr(MSR_AMD64_IBSOPRIP, low, high); + ibs_op.ibs_op_rip_low = low; + ibs_op.ibs_op_rip_high = high; + rdmsr(MSR_AMD64_IBSOPDATA, low, high); + ibs_op.ibs_op_data1_low = low; + ibs_op.ibs_op_data1_high = high; + rdmsr(MSR_AMD64_IBSOPDATA2, low, high); + ibs_op.ibs_op_data2_low = low; + ibs_op.ibs_op_data2_high = high; + rdmsr(MSR_AMD64_IBSOPDATA3, low, high); + ibs_op.ibs_op_data3_low = low; + ibs_op.ibs_op_data3_high = high; + rdmsr(MSR_AMD64_IBSDCLINAD, low, high); + ibs_op.ibs_dc_linear_low = low; + ibs_op.ibs_dc_linear_high = high; + rdmsr(MSR_AMD64_IBSDCPHYSAD, low, high); + ibs_op.ibs_dc_phys_low = low; + ibs_op.ibs_dc_phys_high = high; + + /* reenable the IRQ */ + oprofile_add_ibs_sample(regs, + (unsigned int *)&ibs_op, + IBS_OP_BEGIN); + rdmsr(MSR_AMD64_IBSOPCTL, low, high); + low &= ~(IBS_OP_VALID_BIT); + low |= IBS_OP_ENABLE; + wrmsr(MSR_AMD64_IBSOPCTL, low, high); + } + } + /* See op_model_ppro.c */ return 1; } @@ -148,6 +289,17 @@ static void op_amd_start(struct op_msrs const * const msrs) CTRL_WRITE(low, high, msrs, i); } } + if (ibs_allowed && ibs_config.fetch_enabled) { + low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; + high = IBS_FETCH_ENABLE; + wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); + } + + if (ibs_allowed && ibs_config.op_enabled) { + low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) + IBS_OP_ENABLE; + high = 0; + wrmsr(MSR_AMD64_IBSOPCTL, low, high); + } } @@ -165,6 +317,18 @@ static void op_amd_stop(struct op_msrs const * const msrs) CTRL_SET_INACTIVE(low); CTRL_WRITE(low, high, msrs, i); } + + if (ibs_allowed && ibs_config.fetch_enabled) { + low = 0; /* clear max count and enable */ + high = 0; + wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); + } + + if (ibs_allowed && ibs_config.op_enabled) { + low = 0; /* clear max count and enable */ + high = 0; + wrmsr(MSR_AMD64_IBSOPCTL, low, high); + } } static void op_amd_shutdown(struct op_msrs const * const msrs) @@ -181,6 +345,99 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) } } +static inline void apic_init_ibs_nmi_per_cpu(void *arg) +{ + setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0); +} + +static inline void apic_clear_ibs_nmi_per_cpu(void *arg) +{ + setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); +} + +/* + * initialize the APIC for the IBS interrupts + * if needed on AMD Family10h rev B0 and later + */ +static void setup_ibs(void) +{ + struct pci_dev *gh_device = NULL; + u32 low, high; + u8 vector; + + ibs_allowed = boot_cpu_has(X86_FEATURE_IBS); + + if (!ibs_allowed) + return; + + /* This gets the APIC_EILVT_LVTOFF_IBS value */ + vector = setup_APIC_eilvt_ibs(0, 0, 1); + + /*see if the IBS control register is already set correctly*/ + /*remove this when we know for sure it is done + in the kernel init*/ + rdmsr(MSR_AMD64_IBSCTL, low, high); + if ((low & (IBS_CTL_LVT_OFFSET_VALID_BIT | vector)) != + (IBS_CTL_LVT_OFFSET_VALID_BIT | vector)) { + + /**** Be sure to run loop until NULL is returned to + decrement reference count on any pci_dev structures + returned ****/ + while ((gh_device = pci_get_device(PCI_VENDOR_ID_AMD, + PCI_DEVICE_ID_AMD_10H_NB_MISC, gh_device)) + != NULL) { + /* This code may change if we can find a proper + * way to get at the PCI extended config space */ + pci_write_config_dword( + gh_device, IBS_LVT_OFFSET_PCI, + (vector | IBS_CTL_LVT_OFFSET_VALID_BIT)); + } + } + on_each_cpu(apic_init_ibs_nmi_per_cpu, NULL, 1, 1); +} + + +/* + * unitialize the APIC for the IBS interrupts if needed on AMD Family10h + * rev B0 and later */ +static void clear_ibs_nmi(void) +{ + if (ibs_allowed) + on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1, 1); +} + +static void setup_ibs_files(struct super_block *sb, struct dentry *root) +{ + char buf[12]; + struct dentry *dir; + + if (!ibs_allowed) + return; + + /* setup some reasonable defaults */ + ibs_config.max_cnt_fetch = 250000; + ibs_config.fetch_enabled = 0; + ibs_config.max_cnt_op = 250000; + ibs_config.op_enabled = 0; + ibs_config.dispatched_ops = 1; + snprintf(buf, sizeof(buf), "ibs_fetch"); + dir = oprofilefs_mkdir(sb, root, buf); + oprofilefs_create_ulong(sb, dir, "rand_enable", + &ibs_config.rand_en); + oprofilefs_create_ulong(sb, dir, "enable", + &ibs_config.fetch_enabled); + oprofilefs_create_ulong(sb, dir, "max_count", + &ibs_config.max_cnt_fetch); + snprintf(buf, sizeof(buf), "ibs_uops"); + dir = oprofilefs_mkdir(sb, root, buf); + oprofilefs_create_ulong(sb, dir, "enable", + &ibs_config.op_enabled); + oprofilefs_create_ulong(sb, dir, "max_count", + &ibs_config.max_cnt_op); + oprofilefs_create_ulong(sb, dir, "dispatched_ops", + &ibs_config.dispatched_ops); +} + static int op_amd_init(struct oprofile_operations *ops) { return 0; -- cgit v0.10.2 From 7939d2bf7e30353d40d14f967b7fe2b2a7be5bd9 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:08:56 +0200 Subject: x86/oprofile: separating the IBS handler Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index 229e0b4..a2c8e2e 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c @@ -195,27 +195,18 @@ static void op_amd_setup_ctrs(struct op_msrs const * const msrs) } } - -static int op_amd_check_ctrs(struct pt_regs * const regs, - struct op_msrs const * const msrs) +static inline int +op_amd_handle_ibs(struct pt_regs * const regs, + struct op_msrs const * const msrs) { unsigned int low, high; - int i; struct ibs_fetch_sample ibs_fetch; struct ibs_op_sample ibs_op; - for (i = 0 ; i < NUM_COUNTERS; ++i) { - if (!reset_value[i]) - continue; - CTR_READ(low, high, msrs, i); - if (CTR_OVERFLOWED(low)) { - oprofile_add_sample(regs, i); - CTR_WRITE(reset_value[i], msrs, i); - } - } + if (!ibs_allowed) + return 1; - /*If AMD and IBS is available */ - if (ibs_allowed && ibs_config.fetch_enabled) { + if (ibs_config.fetch_enabled) { rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); if (high & IBS_FETCH_VALID_BIT) { ibs_fetch.ibs_fetch_ctl_high = high; @@ -240,7 +231,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, } } - if (ibs_allowed && ibs_config.op_enabled) { + if (ibs_config.op_enabled) { rdmsr(MSR_AMD64_IBSOPCTL, low, high); if (low & IBS_OP_VALID_BIT) { rdmsr(MSR_AMD64_IBSOPRIP, low, high); @@ -273,10 +264,30 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, } } - /* See op_model_ppro.c */ return 1; } +static int op_amd_check_ctrs(struct pt_regs * const regs, + struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i; + + for (i = 0 ; i < NUM_COUNTERS; ++i) { + if (!reset_value[i]) + continue; + CTR_READ(low, high, msrs, i); + if (CTR_OVERFLOWED(low)) { + oprofile_add_sample(regs, i); + CTR_WRITE(reset_value[i], msrs, i); + } + } + + op_amd_handle_ibs(regs, msrs); + + /* See op_model_ppro.c */ + return 1; +} static void op_amd_start(struct op_msrs const * const msrs) { -- cgit v0.10.2 From 7d77f2dcae37cf232950cd0181fb0a2cddb18130 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:08:57 +0200 Subject: OProfile: change IBS interrupt initialization Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index a2c8e2e..90193b1 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c @@ -356,9 +356,11 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) } } +static u8 ibs_eilvt_off; + static inline void apic_init_ibs_nmi_per_cpu(void *arg) { - setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0); + ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0); } static inline void apic_clear_ibs_nmi_per_cpu(void *arg) @@ -366,45 +368,67 @@ static inline void apic_clear_ibs_nmi_per_cpu(void *arg) setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); } +static int pfm_amd64_setup_eilvt(void) +{ +#define IBSCTL_LVTOFFSETVAL (1 << 8) +#define IBSCTL 0x1cc + struct pci_dev *cpu_cfg; + int nodes; + u32 value = 0; + + /* per CPU setup */ + on_each_cpu(apic_init_ibs_nmi_per_cpu, NULL, 0, 1); + + nodes = 0; + cpu_cfg = NULL; + do { + cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, + PCI_DEVICE_ID_AMD_10H_NB_MISC, + cpu_cfg); + if (!cpu_cfg) + break; + ++nodes; + pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off + | IBSCTL_LVTOFFSETVAL); + pci_read_config_dword(cpu_cfg, IBSCTL, &value); + if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) { + printk(KERN_DEBUG "Failed to setup IBS LVT offset, " + "IBSCTL = 0x%08x", value); + return 1; + } + } while (1); + + if (!nodes) { + printk(KERN_DEBUG "No CPU node configured for IBS"); + return 1; + } + +#ifdef CONFIG_NUMA + /* Sanity check */ + /* Works only for 64bit with proper numa implementation. */ + if (nodes != num_possible_nodes()) { + printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, " + "found: %d, expected %d", + nodes, num_possible_nodes()); + return 1; + } +#endif + return 0; +} + /* * initialize the APIC for the IBS interrupts - * if needed on AMD Family10h rev B0 and later + * if available (AMD Family10h rev B0 and later) */ static void setup_ibs(void) { - struct pci_dev *gh_device = NULL; - u32 low, high; - u8 vector; - ibs_allowed = boot_cpu_has(X86_FEATURE_IBS); if (!ibs_allowed) return; - /* This gets the APIC_EILVT_LVTOFF_IBS value */ - vector = setup_APIC_eilvt_ibs(0, 0, 1); - - /*see if the IBS control register is already set correctly*/ - /*remove this when we know for sure it is done - in the kernel init*/ - rdmsr(MSR_AMD64_IBSCTL, low, high); - if ((low & (IBS_CTL_LVT_OFFSET_VALID_BIT | vector)) != - (IBS_CTL_LVT_OFFSET_VALID_BIT | vector)) { - - /**** Be sure to run loop until NULL is returned to - decrement reference count on any pci_dev structures - returned ****/ - while ((gh_device = pci_get_device(PCI_VENDOR_ID_AMD, - PCI_DEVICE_ID_AMD_10H_NB_MISC, gh_device)) - != NULL) { - /* This code may change if we can find a proper - * way to get at the PCI extended config space */ - pci_write_config_dword( - gh_device, IBS_LVT_OFFSET_PCI, - (vector | IBS_CTL_LVT_OFFSET_VALID_BIT)); - } - } - on_each_cpu(apic_init_ibs_nmi_per_cpu, NULL, 1, 1); + if (pfm_amd64_setup_eilvt()) + ibs_allowed = 0; } -- cgit v0.10.2 From 90645700ef8393cd9cdc02d83da36726fc88f49e Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:08:58 +0200 Subject: OProfile: Fix build error in op_model_athlon.c Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index 90193b1..284c456 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c @@ -73,6 +73,11 @@ /* MSR to set the IBS control register APIC LVT offset */ #define IBS_LVT_OFFSET_PCI 0x1CC +/* The function interface needs to be fixed, something like add + data. Should then be added to linux/oprofile.h. */ +extern void oprofile_add_ibs_sample(struct pt_regs *const regs, + unsigned int * const ibs_sample, u8 code); + struct ibs_fetch_sample { /* MSRC001_1031 IBS Fetch Linear Address Register */ unsigned int ibs_fetch_lin_addr_low; -- cgit v0.10.2 From ebb535de267386f659e0348185b1e361dbba3b59 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:08:59 +0200 Subject: OProfile: on_each_cpu(): kill unused retry parameter Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index 284c456..ce73236 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c @@ -382,7 +382,7 @@ static int pfm_amd64_setup_eilvt(void) u32 value = 0; /* per CPU setup */ - on_each_cpu(apic_init_ibs_nmi_per_cpu, NULL, 0, 1); + on_each_cpu(apic_init_ibs_nmi_per_cpu, NULL, 1); nodes = 0; cpu_cfg = NULL; @@ -443,7 +443,7 @@ static void setup_ibs(void) static void clear_ibs_nmi(void) { if (ibs_allowed) - on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1, 1); + on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); } static void setup_ibs_files(struct super_block *sb, struct dentry *root) -- cgit v0.10.2 From fc2bd7345b4e006a34c2ea3711d8c6b83cba50f7 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:09:00 +0200 Subject: OProfile: fix setup_ibs_files() function interface Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index ce73236..2650b12 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c @@ -446,13 +446,13 @@ static void clear_ibs_nmi(void) on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); } -static void setup_ibs_files(struct super_block *sb, struct dentry *root) +static int setup_ibs_files(struct super_block * sb, struct dentry * root) { char buf[12]; struct dentry *dir; if (!ibs_allowed) - return; + return 0; /* setup some reasonable defaults */ ibs_config.max_cnt_fetch = 250000; @@ -476,6 +476,8 @@ static void setup_ibs_files(struct super_block *sb, struct dentry *root) &ibs_config.max_cnt_op); oprofilefs_create_ulong(sb, dir, "dispatched_ops", &ibs_config.dispatched_ops); + + return 0; } static int op_amd_init(struct oprofile_operations *ops) -- cgit v0.10.2 From 270d3e1a10e6ef85d5a085377e01d91dbcbe3726 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:09:01 +0200 Subject: OProfile: enable IBS for AMD CPUs Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index b298193..287513a 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -468,6 +468,14 @@ int __init op_nmi_init(struct oprofile_operations *ops) return -ENODEV; } + /* default values, can be overwritten by model */ + ops->create_files = nmi_create_files; + ops->setup = nmi_setup; + ops->shutdown = nmi_shutdown; + ops->start = nmi_start; + ops->stop = nmi_stop; + ops->cpu_type = cpu_type; + if (model->init) ret = model->init(ops); if (ret) @@ -475,12 +483,6 @@ int __init op_nmi_init(struct oprofile_operations *ops) init_sysfs(); using_nmi = 1; - ops->create_files = nmi_create_files; - ops->setup = nmi_setup; - ops->shutdown = nmi_shutdown; - ops->start = nmi_start; - ops->stop = nmi_stop; - ops->cpu_type = cpu_type; printk(KERN_INFO "oprofile: using NMI interrupt.\n"); return 0; } diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index 2650b12..0d83903 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c @@ -446,13 +446,25 @@ static void clear_ibs_nmi(void) on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); } +static int (*create_arch_files)(struct super_block * sb, struct dentry * root); + static int setup_ibs_files(struct super_block * sb, struct dentry * root) { char buf[12]; struct dentry *dir; + int ret = 0; + + /* architecture specific files */ + if (create_arch_files) + ret = create_arch_files(sb, root); + + if (ret) + return ret; if (!ibs_allowed) - return 0; + return ret; + + /* model specific files */ /* setup some reasonable defaults */ ibs_config.max_cnt_fetch = 250000; @@ -482,11 +494,15 @@ static int setup_ibs_files(struct super_block * sb, struct dentry * root) static int op_amd_init(struct oprofile_operations *ops) { + setup_ibs(); + create_arch_files = ops->create_files; + ops->create_files = setup_ibs_files; return 0; } static void op_amd_exit(void) { + clear_ibs_nmi(); } struct op_x86_model_spec const op_amd_spec = { -- cgit v0.10.2 From a4c408a41167949f820e2740e56a8f2f7bb6177c Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:09:02 +0200 Subject: OProfile: fix IBS build error for UP Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index 0d83903..1acb067 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c @@ -361,6 +361,26 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) } } +#ifndef CONFIG_SMP + +/* no IBS support */ + +static void setup_ibs(void) +{ + ibs_allowed = 0; +} + +static void clear_ibs_nmi(void) {} + +static int op_amd_init(struct oprofile_operations *ops) +{ + return 0; +} + +static void op_amd_exit(void) {} + +#else + static u8 ibs_eilvt_off; static inline void apic_init_ibs_nmi_per_cpu(void *arg) @@ -505,6 +525,8 @@ static void op_amd_exit(void) clear_ibs_nmi(); } +#endif + struct op_x86_model_spec const op_amd_spec = { .init = op_amd_init, .exit = op_amd_exit, -- cgit v0.10.2 From 87f0baccc2e4f194c931186d3c8499314494a484 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:09:03 +0200 Subject: x86/oprofile: macro definition cleanup in op_model_athlon.c Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index 1acb067..a3a2058 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c @@ -47,32 +47,20 @@ #define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9)) #define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) -#define IBS_FETCH_CTL_HIGH_MASK 0xFFFFFFFF -/* high dword bit IbsFetchCtl[bit 49] */ -#define IBS_FETCH_VALID_BIT (1UL << 17) -/* high dword bit IbsFetchCtl[bit 52] */ -#define IBS_FETCH_PHY_ADDR_VALID_BIT (1UL << 20) -/* high dword bit IbsFetchCtl[bit 48] */ -#define IBS_FETCH_ENABLE (1UL << 16) +/* IbsFetchCtl bits/masks */ +#define IBS_FETCH_HIGH_VALID_BIT (1UL << 17) /* bit 49 */ +#define IBS_FETCH_HIGH_ENABLE (1UL << 16) /* bit 48 */ +#define IBS_FETCH_LOW_MAX_CNT_MASK 0x0000FFFFUL /* MaxCnt mask */ -#define IBS_FETCH_CTL_CNT_MASK 0x00000000FFFF0000UL -#define IBS_FETCH_CTL_MAX_CNT_MASK 0x000000000000FFFFUL - -/*IbsOpCtl masks/bits */ -#define IBS_OP_VALID_BIT (1ULL<<18) /* IbsOpCtl[bit18] */ -#define IBS_OP_ENABLE (1ULL<<17) /* IBS_OP_ENABLE[bit17]*/ +/*IbsOpCtl bits */ +#define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ +#define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ /* Codes used in cpu_buffer.c */ +/* This produces duplicate code, need to be fixed */ #define IBS_FETCH_BEGIN 3 #define IBS_OP_BEGIN 4 -/*IbsOpData3 masks */ -#define IBS_CTL_LVT_OFFSET_VALID_BIT (1ULL<<8) - -/*PCI Extended Configuration Constants */ -/* MSR to set the IBS control register APIC LVT offset */ -#define IBS_LVT_OFFSET_PCI 0x1CC - /* The function interface needs to be fixed, something like add data. Should then be added to linux/oprofile.h. */ extern void oprofile_add_ibs_sample(struct pt_regs *const regs, @@ -213,7 +201,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, if (ibs_config.fetch_enabled) { rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); - if (high & IBS_FETCH_VALID_BIT) { + if (high & IBS_FETCH_HIGH_VALID_BIT) { ibs_fetch.ibs_fetch_ctl_high = high; ibs_fetch.ibs_fetch_ctl_low = low; rdmsr(MSR_AMD64_IBSFETCHLINAD, low, high); @@ -229,16 +217,16 @@ op_amd_handle_ibs(struct pt_regs * const regs, /*reenable the IRQ */ rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); - high &= ~(IBS_FETCH_VALID_BIT); - high |= IBS_FETCH_ENABLE; - low &= IBS_FETCH_CTL_MAX_CNT_MASK; + high &= ~IBS_FETCH_HIGH_VALID_BIT; + high |= IBS_FETCH_HIGH_ENABLE; + low &= IBS_FETCH_LOW_MAX_CNT_MASK; wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); } } if (ibs_config.op_enabled) { rdmsr(MSR_AMD64_IBSOPCTL, low, high); - if (low & IBS_OP_VALID_BIT) { + if (low & IBS_OP_LOW_VALID_BIT) { rdmsr(MSR_AMD64_IBSOPRIP, low, high); ibs_op.ibs_op_rip_low = low; ibs_op.ibs_op_rip_high = high; @@ -263,8 +251,8 @@ op_amd_handle_ibs(struct pt_regs * const regs, (unsigned int *)&ibs_op, IBS_OP_BEGIN); rdmsr(MSR_AMD64_IBSOPCTL, low, high); - low &= ~(IBS_OP_VALID_BIT); - low |= IBS_OP_ENABLE; + low &= ~IBS_OP_LOW_VALID_BIT; + low |= IBS_OP_LOW_ENABLE; wrmsr(MSR_AMD64_IBSOPCTL, low, high); } } @@ -307,12 +295,12 @@ static void op_amd_start(struct op_msrs const * const msrs) } if (ibs_allowed && ibs_config.fetch_enabled) { low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; - high = IBS_FETCH_ENABLE; + high = IBS_FETCH_HIGH_ENABLE; wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); } if (ibs_allowed && ibs_config.op_enabled) { - low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) + IBS_OP_ENABLE; + low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) + IBS_OP_LOW_ENABLE; high = 0; wrmsr(MSR_AMD64_IBSOPCTL, low, high); } -- cgit v0.10.2 From 543a157bbdfae8eb997506031c3b2d4d17957098 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:09:04 +0200 Subject: x86/oprofile: op_model_athlon.c: fix counter reset when reenabling IBS OP Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index a3a2058..9c8c8c5 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c @@ -251,6 +251,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, (unsigned int *)&ibs_op, IBS_OP_BEGIN); rdmsr(MSR_AMD64_IBSOPCTL, low, high); + high = 0; low &= ~IBS_OP_LOW_VALID_BIT; low |= IBS_OP_LOW_ENABLE; wrmsr(MSR_AMD64_IBSOPCTL, low, high); -- cgit v0.10.2 From 09691616850b3614dfb44790e1e1419b6a7f5d13 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:09:05 +0200 Subject: x86: apic: export symbols for extended interrupt LVT functions This patch adds EXPORT_SYMBOLs to allow OProfile to be built as module. Cc: Arjan van de Ven Signed-off-by: Robert Richter Cc: oprofile-list Cc: Arjan van de Ven Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index fad94b0..ed3a6d7 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -672,6 +672,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); return APIC_EILVT_LVTOFF_IBS; } +EXPORT_SYMBOL(setup_APIC_eilvt_ibs); /* * Local APIC start and shutdown diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 42bf69f..45ec90e 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -232,6 +232,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); return APIC_EILVT_LVTOFF_IBS; } +EXPORT_SYMBOL(setup_APIC_eilvt_ibs); /* * Program the next event, relative to now -- cgit v0.10.2 From 6aa360e6c16c145edf1837690e0f7aaea6b86ef3 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 23 Jul 2008 15:28:14 +0200 Subject: x86: apic: changing export symbols to *_GPL This fits better here. Signed-off-by: Robert Richter Cc: Arjan van de Ven Cc: Barry Kasindorf Cc: oprofile-list Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index ed3a6d7..0059e7a 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -672,7 +672,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); return APIC_EILVT_LVTOFF_IBS; } -EXPORT_SYMBOL(setup_APIC_eilvt_ibs); +EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs); /* * Local APIC start and shutdown diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 45ec90e..e571351 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -232,7 +232,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); return APIC_EILVT_LVTOFF_IBS; } -EXPORT_SYMBOL(setup_APIC_eilvt_ibs); +EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs); /* * Program the next event, relative to now -- cgit v0.10.2 From 852402cc27bfa1200164e9e8dc7f6e5f0a4fbd46 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:09:06 +0200 Subject: x86/oprofile: add CONFIG_OPROFILE_IBS option Signed-off-by: Robert Richter Cc: oprofile-list Cc: Robert Richter Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/arch/Kconfig b/arch/Kconfig index b0fabfa..2651af4 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -13,6 +13,20 @@ config OPROFILE If unsure, say N. +config OPROFILE_IBS + bool "OProfile AMD IBS support (EXPERIMENTAL)" + default n + depends on OPROFILE && SMP && X86 + help + Instruction-Based Sampling (IBS) is a new profiling + technique that provides rich, precise program performance + information. IBS is introduced by AMD Family10h processors + (AMD Opteron Quad-Core processor “Barcelonaâ€) to overcome + the limitations of conventional performance counter + sampling. + + If unsure, say N. + config HAVE_OPROFILE def_bool n diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index 9c8c8c5..fb6015c 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c @@ -47,6 +47,10 @@ #define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9)) #define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) +static unsigned long reset_value[NUM_COUNTERS]; + +#ifdef CONFIG_OPROFILE_IBS + /* IbsFetchCtl bits/masks */ #define IBS_FETCH_HIGH_VALID_BIT (1UL << 17) /* bit 49 */ #define IBS_FETCH_HIGH_ENABLE (1UL << 16) /* bit 48 */ @@ -104,7 +108,6 @@ struct ibs_op_sample { */ static void clear_ibs_nmi(void); -static unsigned long reset_value[NUM_COUNTERS]; static int ibs_allowed; /* AMD Family10h and later */ struct op_ibs_config { @@ -118,6 +121,8 @@ struct op_ibs_config { static struct op_ibs_config ibs_config; +#endif + /* functions for op_amd_spec */ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) @@ -188,6 +193,8 @@ static void op_amd_setup_ctrs(struct op_msrs const * const msrs) } } +#ifdef CONFIG_OPROFILE_IBS + static inline int op_amd_handle_ibs(struct pt_regs * const regs, struct op_msrs const * const msrs) @@ -261,6 +268,8 @@ op_amd_handle_ibs(struct pt_regs * const regs, return 1; } +#endif + static int op_amd_check_ctrs(struct pt_regs * const regs, struct op_msrs const * const msrs) { @@ -277,7 +286,9 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, } } +#ifdef CONFIG_OPROFILE_IBS op_amd_handle_ibs(regs, msrs); +#endif /* See op_model_ppro.c */ return 1; @@ -294,6 +305,8 @@ static void op_amd_start(struct op_msrs const * const msrs) CTRL_WRITE(low, high, msrs, i); } } + +#ifdef CONFIG_OPROFILE_IBS if (ibs_allowed && ibs_config.fetch_enabled) { low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; high = IBS_FETCH_HIGH_ENABLE; @@ -305,6 +318,7 @@ static void op_amd_start(struct op_msrs const * const msrs) high = 0; wrmsr(MSR_AMD64_IBSOPCTL, low, high); } +#endif } @@ -323,6 +337,7 @@ static void op_amd_stop(struct op_msrs const * const msrs) CTRL_WRITE(low, high, msrs, i); } +#ifdef CONFIG_OPROFILE_IBS if (ibs_allowed && ibs_config.fetch_enabled) { low = 0; /* clear max count and enable */ high = 0; @@ -334,6 +349,7 @@ static void op_amd_stop(struct op_msrs const * const msrs) high = 0; wrmsr(MSR_AMD64_IBSOPCTL, low, high); } +#endif } static void op_amd_shutdown(struct op_msrs const * const msrs) @@ -350,17 +366,10 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) } } -#ifndef CONFIG_SMP +#ifndef CONFIG_OPROFILE_IBS /* no IBS support */ -static void setup_ibs(void) -{ - ibs_allowed = 0; -} - -static void clear_ibs_nmi(void) {} - static int op_amd_init(struct oprofile_operations *ops) { return 0; @@ -441,8 +450,12 @@ static void setup_ibs(void) if (!ibs_allowed) return; - if (pfm_amd64_setup_eilvt()) + if (pfm_amd64_setup_eilvt()) { ibs_allowed = 0; + return; + } + + printk(KERN_INFO "oprofile: AMD IBS detected\n"); } diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index e1782d2..ed98227 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -328,6 +328,8 @@ static void add_trace_begin(void) add_event_entry(TRACE_BEGIN_CODE); } +#ifdef CONFIG_OPROFILE_IBS + #define IBS_FETCH_CODE_SIZE 2 #define IBS_OP_CODE_SIZE 5 #define IBS_EIP(offset) \ @@ -390,6 +392,8 @@ static void add_ibs_begin(struct oprofile_cpu_buffer *cpu_buf, int code, } } +#endif + static void add_sample_entry(unsigned long offset, unsigned long event) { add_event_entry(offset); @@ -586,6 +590,7 @@ void sync_buffer(int cpu) } else if (s->event == CPU_TRACE_BEGIN) { state = sb_bt_start; add_trace_begin(); +#ifdef CONFIG_OPROFILE_IBS } else if (s->event == IBS_FETCH_BEGIN) { state = sb_bt_start; add_ibs_begin(cpu_buf, @@ -594,6 +599,7 @@ void sync_buffer(int cpu) state = sb_bt_start; add_ibs_begin(cpu_buf, IBS_OP_CODE, in_kernel, mm); +#endif } else { struct mm_struct *oldmm = mm; diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index c9ac4e1..aba905b 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -253,6 +253,8 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) oprofile_add_ext_sample(pc, regs, event, is_kernel); } +#ifdef CONFIG_OPROFILE_IBS + #define MAX_IBS_SAMPLE_SIZE 14 static int log_ibs_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, int is_kernel, unsigned int *ibs, int ibs_code) @@ -318,6 +320,8 @@ void oprofile_add_ibs_sample(struct pt_regs *const regs, oprofile_ops.backtrace(regs, backtrace_depth); } +#endif + void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) { struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); -- cgit v0.10.2 From bd17b625c09d1ed14c4d98604186b0bbb314f6b2 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:09:07 +0200 Subject: oprofile: fix printk in cpu_buffer.c Signed-off-by: Robert Richter Cc: oprofile-list Cc: Robert Richter Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index aba905b..4decab6 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -364,7 +364,7 @@ static void wq_sync_buffer(struct work_struct *work) struct oprofile_cpu_buffer * b = container_of(work, struct oprofile_cpu_buffer, work.work); if (b->cpu != smp_processor_id()) { - printk("WQ on CPU%d, prefer CPU%d\n", + printk(KERN_DEBUG "WQ on CPU%d, prefer CPU%d\n", smp_processor_id(), b->cpu); } sync_buffer(b->cpu); -- cgit v0.10.2 From 6852fd9b86d05063c6ef49d2e12e061cc7f6a105 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 22 Jul 2008 21:09:08 +0200 Subject: x86/oprofile: reanaming op_model_athlon.c to op_model_amd.c Signed-off-by: Robert Richter Cc: oprofile-list Cc: Barry Kasindorf Signed-off-by: Ingo Molnar diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile index 30f3eb3..446902b 100644 --- a/arch/x86/oprofile/Makefile +++ b/arch/x86/oprofile/Makefile @@ -7,6 +7,6 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ timer_int.o ) oprofile-y := $(DRIVER_OBJS) init.o backtrace.o -oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o \ +oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \ op_model_ppro.o op_model_p4.o oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c new file mode 100644 index 0000000..d9faf60 --- /dev/null +++ b/arch/x86/oprofile/op_model_amd.c @@ -0,0 +1,543 @@ +/* + * @file op_model_amd.c + * athlon / K7 / K8 / Family 10h model-specific MSR operations + * + * @remark Copyright 2002-2008 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * @author Philippe Elie + * @author Graydon Hoare + * @author Robert Richter + * @author Barry Kasindorf +*/ + +#include +#include +#include + +#include +#include +#include + +#include "op_x86_model.h" +#include "op_counter.h" + +#define NUM_COUNTERS 4 +#define NUM_CONTROLS 4 + +#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) +#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) +#define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0) +#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) + +#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) +#define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) +#define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) +#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) +#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) +#define CTRL_CLEAR_LO(x) (x &= (1<<21)) +#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0) +#define CTRL_SET_ENABLE(val) (val |= 1<<20) +#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16)) +#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17)) +#define CTRL_SET_UM(val, m) (val |= (m << 8)) +#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff)) +#define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf)) +#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9)) +#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) + +static unsigned long reset_value[NUM_COUNTERS]; + +#ifdef CONFIG_OPROFILE_IBS + +/* IbsFetchCtl bits/masks */ +#define IBS_FETCH_HIGH_VALID_BIT (1UL << 17) /* bit 49 */ +#define IBS_FETCH_HIGH_ENABLE (1UL << 16) /* bit 48 */ +#define IBS_FETCH_LOW_MAX_CNT_MASK 0x0000FFFFUL /* MaxCnt mask */ + +/*IbsOpCtl bits */ +#define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ +#define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ + +/* Codes used in cpu_buffer.c */ +/* This produces duplicate code, need to be fixed */ +#define IBS_FETCH_BEGIN 3 +#define IBS_OP_BEGIN 4 + +/* The function interface needs to be fixed, something like add + data. Should then be added to linux/oprofile.h. */ +extern void oprofile_add_ibs_sample(struct pt_regs *const regs, + unsigned int * const ibs_sample, u8 code); + +struct ibs_fetch_sample { + /* MSRC001_1031 IBS Fetch Linear Address Register */ + unsigned int ibs_fetch_lin_addr_low; + unsigned int ibs_fetch_lin_addr_high; + /* MSRC001_1030 IBS Fetch Control Register */ + unsigned int ibs_fetch_ctl_low; + unsigned int ibs_fetch_ctl_high; + /* MSRC001_1032 IBS Fetch Physical Address Register */ + unsigned int ibs_fetch_phys_addr_low; + unsigned int ibs_fetch_phys_addr_high; +}; + +struct ibs_op_sample { + /* MSRC001_1034 IBS Op Logical Address Register (IbsRIP) */ + unsigned int ibs_op_rip_low; + unsigned int ibs_op_rip_high; + /* MSRC001_1035 IBS Op Data Register */ + unsigned int ibs_op_data1_low; + unsigned int ibs_op_data1_high; + /* MSRC001_1036 IBS Op Data 2 Register */ + unsigned int ibs_op_data2_low; + unsigned int ibs_op_data2_high; + /* MSRC001_1037 IBS Op Data 3 Register */ + unsigned int ibs_op_data3_low; + unsigned int ibs_op_data3_high; + /* MSRC001_1038 IBS DC Linear Address Register (IbsDcLinAd) */ + unsigned int ibs_dc_linear_low; + unsigned int ibs_dc_linear_high; + /* MSRC001_1039 IBS DC Physical Address Register (IbsDcPhysAd) */ + unsigned int ibs_dc_phys_low; + unsigned int ibs_dc_phys_high; +}; + +/* + * unitialize the APIC for the IBS interrupts if needed on AMD Family10h+ +*/ +static void clear_ibs_nmi(void); + +static int ibs_allowed; /* AMD Family10h and later */ + +struct op_ibs_config { + unsigned long op_enabled; + unsigned long fetch_enabled; + unsigned long max_cnt_fetch; + unsigned long max_cnt_op; + unsigned long rand_en; + unsigned long dispatched_ops; +}; + +static struct op_ibs_config ibs_config; + +#endif + +/* functions for op_amd_spec */ + +static void op_amd_fill_in_addresses(struct op_msrs * const msrs) +{ + int i; + + for (i = 0; i < NUM_COUNTERS; i++) { + if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) + msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; + else + msrs->counters[i].addr = 0; + } + + for (i = 0; i < NUM_CONTROLS; i++) { + if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) + msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; + else + msrs->controls[i].addr = 0; + } +} + + +static void op_amd_setup_ctrs(struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i; + + /* clear all counters */ + for (i = 0 ; i < NUM_CONTROLS; ++i) { + if (unlikely(!CTRL_IS_RESERVED(msrs, i))) + continue; + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR_LO(low); + CTRL_CLEAR_HI(high); + CTRL_WRITE(low, high, msrs, i); + } + + /* avoid a false detection of ctr overflows in NMI handler */ + for (i = 0; i < NUM_COUNTERS; ++i) { + if (unlikely(!CTR_IS_RESERVED(msrs, i))) + continue; + CTR_WRITE(1, msrs, i); + } + + /* enable active counters */ + for (i = 0; i < NUM_COUNTERS; ++i) { + if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { + reset_value[i] = counter_config[i].count; + + CTR_WRITE(counter_config[i].count, msrs, i); + + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR_LO(low); + CTRL_CLEAR_HI(high); + CTRL_SET_ENABLE(low); + CTRL_SET_USR(low, counter_config[i].user); + CTRL_SET_KERN(low, counter_config[i].kernel); + CTRL_SET_UM(low, counter_config[i].unit_mask); + CTRL_SET_EVENT_LOW(low, counter_config[i].event); + CTRL_SET_EVENT_HIGH(high, counter_config[i].event); + CTRL_SET_HOST_ONLY(high, 0); + CTRL_SET_GUEST_ONLY(high, 0); + + CTRL_WRITE(low, high, msrs, i); + } else { + reset_value[i] = 0; + } + } +} + +#ifdef CONFIG_OPROFILE_IBS + +static inline int +op_amd_handle_ibs(struct pt_regs * const regs, + struct op_msrs const * const msrs) +{ + unsigned int low, high; + struct ibs_fetch_sample ibs_fetch; + struct ibs_op_sample ibs_op; + + if (!ibs_allowed) + return 1; + + if (ibs_config.fetch_enabled) { + rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); + if (high & IBS_FETCH_HIGH_VALID_BIT) { + ibs_fetch.ibs_fetch_ctl_high = high; + ibs_fetch.ibs_fetch_ctl_low = low; + rdmsr(MSR_AMD64_IBSFETCHLINAD, low, high); + ibs_fetch.ibs_fetch_lin_addr_high = high; + ibs_fetch.ibs_fetch_lin_addr_low = low; + rdmsr(MSR_AMD64_IBSFETCHPHYSAD, low, high); + ibs_fetch.ibs_fetch_phys_addr_high = high; + ibs_fetch.ibs_fetch_phys_addr_low = low; + + oprofile_add_ibs_sample(regs, + (unsigned int *)&ibs_fetch, + IBS_FETCH_BEGIN); + + /*reenable the IRQ */ + rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); + high &= ~IBS_FETCH_HIGH_VALID_BIT; + high |= IBS_FETCH_HIGH_ENABLE; + low &= IBS_FETCH_LOW_MAX_CNT_MASK; + wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); + } + } + + if (ibs_config.op_enabled) { + rdmsr(MSR_AMD64_IBSOPCTL, low, high); + if (low & IBS_OP_LOW_VALID_BIT) { + rdmsr(MSR_AMD64_IBSOPRIP, low, high); + ibs_op.ibs_op_rip_low = low; + ibs_op.ibs_op_rip_high = high; + rdmsr(MSR_AMD64_IBSOPDATA, low, high); + ibs_op.ibs_op_data1_low = low; + ibs_op.ibs_op_data1_high = high; + rdmsr(MSR_AMD64_IBSOPDATA2, low, high); + ibs_op.ibs_op_data2_low = low; + ibs_op.ibs_op_data2_high = high; + rdmsr(MSR_AMD64_IBSOPDATA3, low, high); + ibs_op.ibs_op_data3_low = low; + ibs_op.ibs_op_data3_high = high; + rdmsr(MSR_AMD64_IBSDCLINAD, low, high); + ibs_op.ibs_dc_linear_low = low; + ibs_op.ibs_dc_linear_high = high; + rdmsr(MSR_AMD64_IBSDCPHYSAD, low, high); + ibs_op.ibs_dc_phys_low = low; + ibs_op.ibs_dc_phys_high = high; + + /* reenable the IRQ */ + oprofile_add_ibs_sample(regs, + (unsigned int *)&ibs_op, + IBS_OP_BEGIN); + rdmsr(MSR_AMD64_IBSOPCTL, low, high); + high = 0; + low &= ~IBS_OP_LOW_VALID_BIT; + low |= IBS_OP_LOW_ENABLE; + wrmsr(MSR_AMD64_IBSOPCTL, low, high); + } + } + + return 1; +} + +#endif + +static int op_amd_check_ctrs(struct pt_regs * const regs, + struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i; + + for (i = 0 ; i < NUM_COUNTERS; ++i) { + if (!reset_value[i]) + continue; + CTR_READ(low, high, msrs, i); + if (CTR_OVERFLOWED(low)) { + oprofile_add_sample(regs, i); + CTR_WRITE(reset_value[i], msrs, i); + } + } + +#ifdef CONFIG_OPROFILE_IBS + op_amd_handle_ibs(regs, msrs); +#endif + + /* See op_model_ppro.c */ + return 1; +} + +static void op_amd_start(struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i; + for (i = 0 ; i < NUM_COUNTERS ; ++i) { + if (reset_value[i]) { + CTRL_READ(low, high, msrs, i); + CTRL_SET_ACTIVE(low); + CTRL_WRITE(low, high, msrs, i); + } + } + +#ifdef CONFIG_OPROFILE_IBS + if (ibs_allowed && ibs_config.fetch_enabled) { + low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; + high = IBS_FETCH_HIGH_ENABLE; + wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); + } + + if (ibs_allowed && ibs_config.op_enabled) { + low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) + IBS_OP_LOW_ENABLE; + high = 0; + wrmsr(MSR_AMD64_IBSOPCTL, low, high); + } +#endif +} + + +static void op_amd_stop(struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i; + + /* Subtle: stop on all counters to avoid race with + * setting our pm callback */ + for (i = 0 ; i < NUM_COUNTERS ; ++i) { + if (!reset_value[i]) + continue; + CTRL_READ(low, high, msrs, i); + CTRL_SET_INACTIVE(low); + CTRL_WRITE(low, high, msrs, i); + } + +#ifdef CONFIG_OPROFILE_IBS + if (ibs_allowed && ibs_config.fetch_enabled) { + low = 0; /* clear max count and enable */ + high = 0; + wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); + } + + if (ibs_allowed && ibs_config.op_enabled) { + low = 0; /* clear max count and enable */ + high = 0; + wrmsr(MSR_AMD64_IBSOPCTL, low, high); + } +#endif +} + +static void op_amd_shutdown(struct op_msrs const * const msrs) +{ + int i; + + for (i = 0 ; i < NUM_COUNTERS ; ++i) { + if (CTR_IS_RESERVED(msrs, i)) + release_perfctr_nmi(MSR_K7_PERFCTR0 + i); + } + for (i = 0 ; i < NUM_CONTROLS ; ++i) { + if (CTRL_IS_RESERVED(msrs, i)) + release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); + } +} + +#ifndef CONFIG_OPROFILE_IBS + +/* no IBS support */ + +static int op_amd_init(struct oprofile_operations *ops) +{ + return 0; +} + +static void op_amd_exit(void) {} + +#else + +static u8 ibs_eilvt_off; + +static inline void apic_init_ibs_nmi_per_cpu(void *arg) +{ + ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0); +} + +static inline void apic_clear_ibs_nmi_per_cpu(void *arg) +{ + setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); +} + +static int pfm_amd64_setup_eilvt(void) +{ +#define IBSCTL_LVTOFFSETVAL (1 << 8) +#define IBSCTL 0x1cc + struct pci_dev *cpu_cfg; + int nodes; + u32 value = 0; + + /* per CPU setup */ + on_each_cpu(apic_init_ibs_nmi_per_cpu, NULL, 1); + + nodes = 0; + cpu_cfg = NULL; + do { + cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, + PCI_DEVICE_ID_AMD_10H_NB_MISC, + cpu_cfg); + if (!cpu_cfg) + break; + ++nodes; + pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off + | IBSCTL_LVTOFFSETVAL); + pci_read_config_dword(cpu_cfg, IBSCTL, &value); + if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) { + printk(KERN_DEBUG "Failed to setup IBS LVT offset, " + "IBSCTL = 0x%08x", value); + return 1; + } + } while (1); + + if (!nodes) { + printk(KERN_DEBUG "No CPU node configured for IBS"); + return 1; + } + +#ifdef CONFIG_NUMA + /* Sanity check */ + /* Works only for 64bit with proper numa implementation. */ + if (nodes != num_possible_nodes()) { + printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, " + "found: %d, expected %d", + nodes, num_possible_nodes()); + return 1; + } +#endif + return 0; +} + +/* + * initialize the APIC for the IBS interrupts + * if available (AMD Family10h rev B0 and later) + */ +static void setup_ibs(void) +{ + ibs_allowed = boot_cpu_has(X86_FEATURE_IBS); + + if (!ibs_allowed) + return; + + if (pfm_amd64_setup_eilvt()) { + ibs_allowed = 0; + return; + } + + printk(KERN_INFO "oprofile: AMD IBS detected\n"); +} + + +/* + * unitialize the APIC for the IBS interrupts if needed on AMD Family10h + * rev B0 and later */ +static void clear_ibs_nmi(void) +{ + if (ibs_allowed) + on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); +} + +static int (*create_arch_files)(struct super_block * sb, struct dentry * root); + +static int setup_ibs_files(struct super_block * sb, struct dentry * root) +{ + char buf[12]; + struct dentry *dir; + int ret = 0; + + /* architecture specific files */ + if (create_arch_files) + ret = create_arch_files(sb, root); + + if (ret) + return ret; + + if (!ibs_allowed) + return ret; + + /* model specific files */ + + /* setup some reasonable defaults */ + ibs_config.max_cnt_fetch = 250000; + ibs_config.fetch_enabled = 0; + ibs_config.max_cnt_op = 250000; + ibs_config.op_enabled = 0; + ibs_config.dispatched_ops = 1; + snprintf(buf, sizeof(buf), "ibs_fetch"); + dir = oprofilefs_mkdir(sb, root, buf); + oprofilefs_create_ulong(sb, dir, "rand_enable", + &ibs_config.rand_en); + oprofilefs_create_ulong(sb, dir, "enable", + &ibs_config.fetch_enabled); + oprofilefs_create_ulong(sb, dir, "max_count", + &ibs_config.max_cnt_fetch); + snprintf(buf, sizeof(buf), "ibs_uops"); + dir = oprofilefs_mkdir(sb, root, buf); + oprofilefs_create_ulong(sb, dir, "enable", + &ibs_config.op_enabled); + oprofilefs_create_ulong(sb, dir, "max_count", + &ibs_config.max_cnt_op); + oprofilefs_create_ulong(sb, dir, "dispatched_ops", + &ibs_config.dispatched_ops); + + return 0; +} + +static int op_amd_init(struct oprofile_operations *ops) +{ + setup_ibs(); + create_arch_files = ops->create_files; + ops->create_files = setup_ibs_files; + return 0; +} + +static void op_amd_exit(void) +{ + clear_ibs_nmi(); +} + +#endif + +struct op_x86_model_spec const op_amd_spec = { + .init = op_amd_init, + .exit = op_amd_exit, + .num_counters = NUM_COUNTERS, + .num_controls = NUM_CONTROLS, + .fill_in_addresses = &op_amd_fill_in_addresses, + .setup_ctrs = &op_amd_setup_ctrs, + .check_ctrs = &op_amd_check_ctrs, + .start = &op_amd_start, + .stop = &op_amd_stop, + .shutdown = &op_amd_shutdown +}; diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c deleted file mode 100644 index fb6015c..0000000 --- a/arch/x86/oprofile/op_model_athlon.c +++ /dev/null @@ -1,543 +0,0 @@ -/* - * @file op_model_athlon.c - * athlon / K7 / K8 / Family 10h model-specific MSR operations - * - * @remark Copyright 2002-2008 OProfile authors - * @remark Read the file COPYING - * - * @author John Levon - * @author Philippe Elie - * @author Graydon Hoare - * @author Robert Richter - * @author Barry Kasindorf -*/ - -#include -#include -#include - -#include -#include -#include - -#include "op_x86_model.h" -#include "op_counter.h" - -#define NUM_COUNTERS 4 -#define NUM_CONTROLS 4 - -#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) -#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) -#define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0) -#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) - -#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) -#define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) -#define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) -#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) -#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) -#define CTRL_CLEAR_LO(x) (x &= (1<<21)) -#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0) -#define CTRL_SET_ENABLE(val) (val |= 1<<20) -#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16)) -#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17)) -#define CTRL_SET_UM(val, m) (val |= (m << 8)) -#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff)) -#define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf)) -#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9)) -#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) - -static unsigned long reset_value[NUM_COUNTERS]; - -#ifdef CONFIG_OPROFILE_IBS - -/* IbsFetchCtl bits/masks */ -#define IBS_FETCH_HIGH_VALID_BIT (1UL << 17) /* bit 49 */ -#define IBS_FETCH_HIGH_ENABLE (1UL << 16) /* bit 48 */ -#define IBS_FETCH_LOW_MAX_CNT_MASK 0x0000FFFFUL /* MaxCnt mask */ - -/*IbsOpCtl bits */ -#define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ -#define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ - -/* Codes used in cpu_buffer.c */ -/* This produces duplicate code, need to be fixed */ -#define IBS_FETCH_BEGIN 3 -#define IBS_OP_BEGIN 4 - -/* The function interface needs to be fixed, something like add - data. Should then be added to linux/oprofile.h. */ -extern void oprofile_add_ibs_sample(struct pt_regs *const regs, - unsigned int * const ibs_sample, u8 code); - -struct ibs_fetch_sample { - /* MSRC001_1031 IBS Fetch Linear Address Register */ - unsigned int ibs_fetch_lin_addr_low; - unsigned int ibs_fetch_lin_addr_high; - /* MSRC001_1030 IBS Fetch Control Register */ - unsigned int ibs_fetch_ctl_low; - unsigned int ibs_fetch_ctl_high; - /* MSRC001_1032 IBS Fetch Physical Address Register */ - unsigned int ibs_fetch_phys_addr_low; - unsigned int ibs_fetch_phys_addr_high; -}; - -struct ibs_op_sample { - /* MSRC001_1034 IBS Op Logical Address Register (IbsRIP) */ - unsigned int ibs_op_rip_low; - unsigned int ibs_op_rip_high; - /* MSRC001_1035 IBS Op Data Register */ - unsigned int ibs_op_data1_low; - unsigned int ibs_op_data1_high; - /* MSRC001_1036 IBS Op Data 2 Register */ - unsigned int ibs_op_data2_low; - unsigned int ibs_op_data2_high; - /* MSRC001_1037 IBS Op Data 3 Register */ - unsigned int ibs_op_data3_low; - unsigned int ibs_op_data3_high; - /* MSRC001_1038 IBS DC Linear Address Register (IbsDcLinAd) */ - unsigned int ibs_dc_linear_low; - unsigned int ibs_dc_linear_high; - /* MSRC001_1039 IBS DC Physical Address Register (IbsDcPhysAd) */ - unsigned int ibs_dc_phys_low; - unsigned int ibs_dc_phys_high; -}; - -/* - * unitialize the APIC for the IBS interrupts if needed on AMD Family10h+ -*/ -static void clear_ibs_nmi(void); - -static int ibs_allowed; /* AMD Family10h and later */ - -struct op_ibs_config { - unsigned long op_enabled; - unsigned long fetch_enabled; - unsigned long max_cnt_fetch; - unsigned long max_cnt_op; - unsigned long rand_en; - unsigned long dispatched_ops; -}; - -static struct op_ibs_config ibs_config; - -#endif - -/* functions for op_amd_spec */ - -static void op_amd_fill_in_addresses(struct op_msrs * const msrs) -{ - int i; - - for (i = 0; i < NUM_COUNTERS; i++) { - if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) - msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; - else - msrs->counters[i].addr = 0; - } - - for (i = 0; i < NUM_CONTROLS; i++) { - if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) - msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; - else - msrs->controls[i].addr = 0; - } -} - - -static void op_amd_setup_ctrs(struct op_msrs const * const msrs) -{ - unsigned int low, high; - int i; - - /* clear all counters */ - for (i = 0 ; i < NUM_CONTROLS; ++i) { - if (unlikely(!CTRL_IS_RESERVED(msrs, i))) - continue; - CTRL_READ(low, high, msrs, i); - CTRL_CLEAR_LO(low); - CTRL_CLEAR_HI(high); - CTRL_WRITE(low, high, msrs, i); - } - - /* avoid a false detection of ctr overflows in NMI handler */ - for (i = 0; i < NUM_COUNTERS; ++i) { - if (unlikely(!CTR_IS_RESERVED(msrs, i))) - continue; - CTR_WRITE(1, msrs, i); - } - - /* enable active counters */ - for (i = 0; i < NUM_COUNTERS; ++i) { - if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { - reset_value[i] = counter_config[i].count; - - CTR_WRITE(counter_config[i].count, msrs, i); - - CTRL_READ(low, high, msrs, i); - CTRL_CLEAR_LO(low); - CTRL_CLEAR_HI(high); - CTRL_SET_ENABLE(low); - CTRL_SET_USR(low, counter_config[i].user); - CTRL_SET_KERN(low, counter_config[i].kernel); - CTRL_SET_UM(low, counter_config[i].unit_mask); - CTRL_SET_EVENT_LOW(low, counter_config[i].event); - CTRL_SET_EVENT_HIGH(high, counter_config[i].event); - CTRL_SET_HOST_ONLY(high, 0); - CTRL_SET_GUEST_ONLY(high, 0); - - CTRL_WRITE(low, high, msrs, i); - } else { - reset_value[i] = 0; - } - } -} - -#ifdef CONFIG_OPROFILE_IBS - -static inline int -op_amd_handle_ibs(struct pt_regs * const regs, - struct op_msrs const * const msrs) -{ - unsigned int low, high; - struct ibs_fetch_sample ibs_fetch; - struct ibs_op_sample ibs_op; - - if (!ibs_allowed) - return 1; - - if (ibs_config.fetch_enabled) { - rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); - if (high & IBS_FETCH_HIGH_VALID_BIT) { - ibs_fetch.ibs_fetch_ctl_high = high; - ibs_fetch.ibs_fetch_ctl_low = low; - rdmsr(MSR_AMD64_IBSFETCHLINAD, low, high); - ibs_fetch.ibs_fetch_lin_addr_high = high; - ibs_fetch.ibs_fetch_lin_addr_low = low; - rdmsr(MSR_AMD64_IBSFETCHPHYSAD, low, high); - ibs_fetch.ibs_fetch_phys_addr_high = high; - ibs_fetch.ibs_fetch_phys_addr_low = low; - - oprofile_add_ibs_sample(regs, - (unsigned int *)&ibs_fetch, - IBS_FETCH_BEGIN); - - /*reenable the IRQ */ - rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); - high &= ~IBS_FETCH_HIGH_VALID_BIT; - high |= IBS_FETCH_HIGH_ENABLE; - low &= IBS_FETCH_LOW_MAX_CNT_MASK; - wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); - } - } - - if (ibs_config.op_enabled) { - rdmsr(MSR_AMD64_IBSOPCTL, low, high); - if (low & IBS_OP_LOW_VALID_BIT) { - rdmsr(MSR_AMD64_IBSOPRIP, low, high); - ibs_op.ibs_op_rip_low = low; - ibs_op.ibs_op_rip_high = high; - rdmsr(MSR_AMD64_IBSOPDATA, low, high); - ibs_op.ibs_op_data1_low = low; - ibs_op.ibs_op_data1_high = high; - rdmsr(MSR_AMD64_IBSOPDATA2, low, high); - ibs_op.ibs_op_data2_low = low; - ibs_op.ibs_op_data2_high = high; - rdmsr(MSR_AMD64_IBSOPDATA3, low, high); - ibs_op.ibs_op_data3_low = low; - ibs_op.ibs_op_data3_high = high; - rdmsr(MSR_AMD64_IBSDCLINAD, low, high); - ibs_op.ibs_dc_linear_low = low; - ibs_op.ibs_dc_linear_high = high; - rdmsr(MSR_AMD64_IBSDCPHYSAD, low, high); - ibs_op.ibs_dc_phys_low = low; - ibs_op.ibs_dc_phys_high = high; - - /* reenable the IRQ */ - oprofile_add_ibs_sample(regs, - (unsigned int *)&ibs_op, - IBS_OP_BEGIN); - rdmsr(MSR_AMD64_IBSOPCTL, low, high); - high = 0; - low &= ~IBS_OP_LOW_VALID_BIT; - low |= IBS_OP_LOW_ENABLE; - wrmsr(MSR_AMD64_IBSOPCTL, low, high); - } - } - - return 1; -} - -#endif - -static int op_amd_check_ctrs(struct pt_regs * const regs, - struct op_msrs const * const msrs) -{ - unsigned int low, high; - int i; - - for (i = 0 ; i < NUM_COUNTERS; ++i) { - if (!reset_value[i]) - continue; - CTR_READ(low, high, msrs, i); - if (CTR_OVERFLOWED(low)) { - oprofile_add_sample(regs, i); - CTR_WRITE(reset_value[i], msrs, i); - } - } - -#ifdef CONFIG_OPROFILE_IBS - op_amd_handle_ibs(regs, msrs); -#endif - - /* See op_model_ppro.c */ - return 1; -} - -static void op_amd_start(struct op_msrs const * const msrs) -{ - unsigned int low, high; - int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (reset_value[i]) { - CTRL_READ(low, high, msrs, i); - CTRL_SET_ACTIVE(low); - CTRL_WRITE(low, high, msrs, i); - } - } - -#ifdef CONFIG_OPROFILE_IBS - if (ibs_allowed && ibs_config.fetch_enabled) { - low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; - high = IBS_FETCH_HIGH_ENABLE; - wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); - } - - if (ibs_allowed && ibs_config.op_enabled) { - low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) + IBS_OP_LOW_ENABLE; - high = 0; - wrmsr(MSR_AMD64_IBSOPCTL, low, high); - } -#endif -} - - -static void op_amd_stop(struct op_msrs const * const msrs) -{ - unsigned int low, high; - int i; - - /* Subtle: stop on all counters to avoid race with - * setting our pm callback */ - for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (!reset_value[i]) - continue; - CTRL_READ(low, high, msrs, i); - CTRL_SET_INACTIVE(low); - CTRL_WRITE(low, high, msrs, i); - } - -#ifdef CONFIG_OPROFILE_IBS - if (ibs_allowed && ibs_config.fetch_enabled) { - low = 0; /* clear max count and enable */ - high = 0; - wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); - } - - if (ibs_allowed && ibs_config.op_enabled) { - low = 0; /* clear max count and enable */ - high = 0; - wrmsr(MSR_AMD64_IBSOPCTL, low, high); - } -#endif -} - -static void op_amd_shutdown(struct op_msrs const * const msrs) -{ - int i; - - for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (CTR_IS_RESERVED(msrs, i)) - release_perfctr_nmi(MSR_K7_PERFCTR0 + i); - } - for (i = 0 ; i < NUM_CONTROLS ; ++i) { - if (CTRL_IS_RESERVED(msrs, i)) - release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); - } -} - -#ifndef CONFIG_OPROFILE_IBS - -/* no IBS support */ - -static int op_amd_init(struct oprofile_operations *ops) -{ - return 0; -} - -static void op_amd_exit(void) {} - -#else - -static u8 ibs_eilvt_off; - -static inline void apic_init_ibs_nmi_per_cpu(void *arg) -{ - ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0); -} - -static inline void apic_clear_ibs_nmi_per_cpu(void *arg) -{ - setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); -} - -static int pfm_amd64_setup_eilvt(void) -{ -#define IBSCTL_LVTOFFSETVAL (1 << 8) -#define IBSCTL 0x1cc - struct pci_dev *cpu_cfg; - int nodes; - u32 value = 0; - - /* per CPU setup */ - on_each_cpu(apic_init_ibs_nmi_per_cpu, NULL, 1); - - nodes = 0; - cpu_cfg = NULL; - do { - cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, - PCI_DEVICE_ID_AMD_10H_NB_MISC, - cpu_cfg); - if (!cpu_cfg) - break; - ++nodes; - pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off - | IBSCTL_LVTOFFSETVAL); - pci_read_config_dword(cpu_cfg, IBSCTL, &value); - if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) { - printk(KERN_DEBUG "Failed to setup IBS LVT offset, " - "IBSCTL = 0x%08x", value); - return 1; - } - } while (1); - - if (!nodes) { - printk(KERN_DEBUG "No CPU node configured for IBS"); - return 1; - } - -#ifdef CONFIG_NUMA - /* Sanity check */ - /* Works only for 64bit with proper numa implementation. */ - if (nodes != num_possible_nodes()) { - printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, " - "found: %d, expected %d", - nodes, num_possible_nodes()); - return 1; - } -#endif - return 0; -} - -/* - * initialize the APIC for the IBS interrupts - * if available (AMD Family10h rev B0 and later) - */ -static void setup_ibs(void) -{ - ibs_allowed = boot_cpu_has(X86_FEATURE_IBS); - - if (!ibs_allowed) - return; - - if (pfm_amd64_setup_eilvt()) { - ibs_allowed = 0; - return; - } - - printk(KERN_INFO "oprofile: AMD IBS detected\n"); -} - - -/* - * unitialize the APIC for the IBS interrupts if needed on AMD Family10h - * rev B0 and later */ -static void clear_ibs_nmi(void) -{ - if (ibs_allowed) - on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); -} - -static int (*create_arch_files)(struct super_block * sb, struct dentry * root); - -static int setup_ibs_files(struct super_block * sb, struct dentry * root) -{ - char buf[12]; - struct dentry *dir; - int ret = 0; - - /* architecture specific files */ - if (create_arch_files) - ret = create_arch_files(sb, root); - - if (ret) - return ret; - - if (!ibs_allowed) - return ret; - - /* model specific files */ - - /* setup some reasonable defaults */ - ibs_config.max_cnt_fetch = 250000; - ibs_config.fetch_enabled = 0; - ibs_config.max_cnt_op = 250000; - ibs_config.op_enabled = 0; - ibs_config.dispatched_ops = 1; - snprintf(buf, sizeof(buf), "ibs_fetch"); - dir = oprofilefs_mkdir(sb, root, buf); - oprofilefs_create_ulong(sb, dir, "rand_enable", - &ibs_config.rand_en); - oprofilefs_create_ulong(sb, dir, "enable", - &ibs_config.fetch_enabled); - oprofilefs_create_ulong(sb, dir, "max_count", - &ibs_config.max_cnt_fetch); - snprintf(buf, sizeof(buf), "ibs_uops"); - dir = oprofilefs_mkdir(sb, root, buf); - oprofilefs_create_ulong(sb, dir, "enable", - &ibs_config.op_enabled); - oprofilefs_create_ulong(sb, dir, "max_count", - &ibs_config.max_cnt_op); - oprofilefs_create_ulong(sb, dir, "dispatched_ops", - &ibs_config.dispatched_ops); - - return 0; -} - -static int op_amd_init(struct oprofile_operations *ops) -{ - setup_ibs(); - create_arch_files = ops->create_files; - ops->create_files = setup_ibs_files; - return 0; -} - -static void op_amd_exit(void) -{ - clear_ibs_nmi(); -} - -#endif - -struct op_x86_model_spec const op_amd_spec = { - .init = op_amd_init, - .exit = op_amd_exit, - .num_counters = NUM_COUNTERS, - .num_controls = NUM_CONTROLS, - .fill_in_addresses = &op_amd_fill_in_addresses, - .setup_ctrs = &op_amd_setup_ctrs, - .check_ctrs = &op_amd_check_ctrs, - .start = &op_amd_start, - .stop = &op_amd_stop, - .shutdown = &op_amd_shutdown -}; -- cgit v0.10.2 From 1ddb5518052e4e28ab489237443f7443b3fd69ca Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Jul 2008 16:48:55 +0200 Subject: x86: convert pci-dma.c from round_up to roundup Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index cbecb05..88ddd04 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -82,7 +82,7 @@ void __init dma32_reserve_bootmem(void) * using 512M as goal */ align = 64ULL<<20; - size = round_up(dma32_bootmem_size, align); + size = roundup(dma32_bootmem_size, align); dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, 512ULL<<20); if (dma32_bootmem_ptr) -- cgit v0.10.2 From 15ae2d76ceb037a1e3fcd8fc9b4fe3177f9f3831 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Jul 2008 16:48:56 +0200 Subject: x86: convert pageattr.c from round_up to roundup Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 65c6e46..0d254ad 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -78,7 +78,7 @@ static inline unsigned long highmap_start_pfn(void) static inline unsigned long highmap_end_pfn(void) { - return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; + return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; } #endif -- cgit v0.10.2 From d86bb0dac792c6a9c92944b6db2687980c808094 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Jul 2008 16:48:57 +0200 Subject: x86: convert init_64.c from round_up to roundup Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ec37121..e480577 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -258,7 +258,7 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size) void __init cleanup_highmap(void) { unsigned long vaddr = __START_KERNEL_map; - unsigned long end = round_up((unsigned long)_end, PMD_SIZE) - 1; + unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1; pmd_t *pmd = level2_kernel_pgt; pmd_t *last_pmd = pmd + PTRS_PER_PMD; @@ -474,14 +474,14 @@ static void __init find_early_table_space(unsigned long end) unsigned long puds, pmds, ptes, tables, start; puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - tables = round_up(puds * sizeof(pud_t), PAGE_SIZE); + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); if (direct_gbpages) { unsigned long extra; extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; } else pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; - tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE); + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); if (cpu_has_pse) { unsigned long extra; @@ -489,7 +489,7 @@ static void __init find_early_table_space(unsigned long end) ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; } else ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; - tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE); + tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); /* * RED-PEN putting page tables only on node 0 could -- cgit v0.10.2 From be3e89ee6df8607356f705901dd90bcf3836c86e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Jul 2008 16:48:58 +0200 Subject: x86: convert numa_64.c from round_up to roundup Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index a4dd793..cebcbf1 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -79,7 +79,7 @@ static int __init allocate_cachealigned_memnodemap(void) return 0; addr = 0x8000; - nodemap_size = round_up(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); + nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); nodemap_addr = find_e820_area(addr, max_pfn< Date: Fri, 25 Jul 2008 16:48:59 +0200 Subject: x86: convert discontig_32.c from round_up to roundup Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 62fa440..847c164 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -328,7 +328,7 @@ void __init initmem_init(unsigned long start_pfn, get_memcfg_numa(); - kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE); + kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE); kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); do { -- cgit v0.10.2 From 0af36739af81f152cc24a0fdfa0754ef657afe3d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jul 2008 17:27:57 -0700 Subject: usb: move ehci reg def prepare x86: usb debug port early console move ehci struct def to linux/usrb/ehci_def.h from host/ehci.h Signed-off-by: Yinghai Lu Acked-by: David Brownell Cc: Andrew Morton Cc: Andi Kleen Cc: "Arjan van de Ven" Cc: "Eric W. Biederman" Cc: "Greg KH" Signed-off-by: Ingo Molnar diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h index 5799298..b697a13 100644 --- a/drivers/usb/host/ehci.h +++ b/drivers/usb/host/ehci.h @@ -210,143 +210,7 @@ timer_action (struct ehci_hcd *ehci, enum ehci_timer_action action) /*-------------------------------------------------------------------------*/ -/* EHCI register interface, corresponds to EHCI Revision 0.95 specification */ - -/* Section 2.2 Host Controller Capability Registers */ -struct ehci_caps { - /* these fields are specified as 8 and 16 bit registers, - * but some hosts can't perform 8 or 16 bit PCI accesses. - */ - u32 hc_capbase; -#define HC_LENGTH(p) (((p)>>00)&0x00ff) /* bits 7:0 */ -#define HC_VERSION(p) (((p)>>16)&0xffff) /* bits 31:16 */ - u32 hcs_params; /* HCSPARAMS - offset 0x4 */ -#define HCS_DEBUG_PORT(p) (((p)>>20)&0xf) /* bits 23:20, debug port? */ -#define HCS_INDICATOR(p) ((p)&(1 << 16)) /* true: has port indicators */ -#define HCS_N_CC(p) (((p)>>12)&0xf) /* bits 15:12, #companion HCs */ -#define HCS_N_PCC(p) (((p)>>8)&0xf) /* bits 11:8, ports per CC */ -#define HCS_PORTROUTED(p) ((p)&(1 << 7)) /* true: port routing */ -#define HCS_PPC(p) ((p)&(1 << 4)) /* true: port power control */ -#define HCS_N_PORTS(p) (((p)>>0)&0xf) /* bits 3:0, ports on HC */ - - u32 hcc_params; /* HCCPARAMS - offset 0x8 */ -#define HCC_EXT_CAPS(p) (((p)>>8)&0xff) /* for pci extended caps */ -#define HCC_ISOC_CACHE(p) ((p)&(1 << 7)) /* true: can cache isoc frame */ -#define HCC_ISOC_THRES(p) (((p)>>4)&0x7) /* bits 6:4, uframes cached */ -#define HCC_CANPARK(p) ((p)&(1 << 2)) /* true: can park on async qh */ -#define HCC_PGM_FRAMELISTLEN(p) ((p)&(1 << 1)) /* true: periodic_size changes*/ -#define HCC_64BIT_ADDR(p) ((p)&(1)) /* true: can use 64-bit addr */ - u8 portroute [8]; /* nibbles for routing - offset 0xC */ -} __attribute__ ((packed)); - - -/* Section 2.3 Host Controller Operational Registers */ -struct ehci_regs { - - /* USBCMD: offset 0x00 */ - u32 command; -/* 23:16 is r/w intr rate, in microframes; default "8" == 1/msec */ -#define CMD_PARK (1<<11) /* enable "park" on async qh */ -#define CMD_PARK_CNT(c) (((c)>>8)&3) /* how many transfers to park for */ -#define CMD_LRESET (1<<7) /* partial reset (no ports, etc) */ -#define CMD_IAAD (1<<6) /* "doorbell" interrupt async advance */ -#define CMD_ASE (1<<5) /* async schedule enable */ -#define CMD_PSE (1<<4) /* periodic schedule enable */ -/* 3:2 is periodic frame list size */ -#define CMD_RESET (1<<1) /* reset HC not bus */ -#define CMD_RUN (1<<0) /* start/stop HC */ - - /* USBSTS: offset 0x04 */ - u32 status; -#define STS_ASS (1<<15) /* Async Schedule Status */ -#define STS_PSS (1<<14) /* Periodic Schedule Status */ -#define STS_RECL (1<<13) /* Reclamation */ -#define STS_HALT (1<<12) /* Not running (any reason) */ -/* some bits reserved */ - /* these STS_* flags are also intr_enable bits (USBINTR) */ -#define STS_IAA (1<<5) /* Interrupted on async advance */ -#define STS_FATAL (1<<4) /* such as some PCI access errors */ -#define STS_FLR (1<<3) /* frame list rolled over */ -#define STS_PCD (1<<2) /* port change detect */ -#define STS_ERR (1<<1) /* "error" completion (overflow, ...) */ -#define STS_INT (1<<0) /* "normal" completion (short, ...) */ - - /* USBINTR: offset 0x08 */ - u32 intr_enable; - - /* FRINDEX: offset 0x0C */ - u32 frame_index; /* current microframe number */ - /* CTRLDSSEGMENT: offset 0x10 */ - u32 segment; /* address bits 63:32 if needed */ - /* PERIODICLISTBASE: offset 0x14 */ - u32 frame_list; /* points to periodic list */ - /* ASYNCLISTADDR: offset 0x18 */ - u32 async_next; /* address of next async queue head */ - - u32 reserved [9]; - - /* CONFIGFLAG: offset 0x40 */ - u32 configured_flag; -#define FLAG_CF (1<<0) /* true: we'll support "high speed" */ - - /* PORTSC: offset 0x44 */ - u32 port_status [0]; /* up to N_PORTS */ -/* 31:23 reserved */ -#define PORT_WKOC_E (1<<22) /* wake on overcurrent (enable) */ -#define PORT_WKDISC_E (1<<21) /* wake on disconnect (enable) */ -#define PORT_WKCONN_E (1<<20) /* wake on connect (enable) */ -/* 19:16 for port testing */ -#define PORT_LED_OFF (0<<14) -#define PORT_LED_AMBER (1<<14) -#define PORT_LED_GREEN (2<<14) -#define PORT_LED_MASK (3<<14) -#define PORT_OWNER (1<<13) /* true: companion hc owns this port */ -#define PORT_POWER (1<<12) /* true: has power (see PPC) */ -#define PORT_USB11(x) (((x)&(3<<10))==(1<<10)) /* USB 1.1 device */ -/* 11:10 for detecting lowspeed devices (reset vs release ownership) */ -/* 9 reserved */ -#define PORT_RESET (1<<8) /* reset port */ -#define PORT_SUSPEND (1<<7) /* suspend port */ -#define PORT_RESUME (1<<6) /* resume it */ -#define PORT_OCC (1<<5) /* over current change */ -#define PORT_OC (1<<4) /* over current active */ -#define PORT_PEC (1<<3) /* port enable change */ -#define PORT_PE (1<<2) /* port enable */ -#define PORT_CSC (1<<1) /* connect status change */ -#define PORT_CONNECT (1<<0) /* device connected */ -#define PORT_RWC_BITS (PORT_CSC | PORT_PEC | PORT_OCC) -} __attribute__ ((packed)); - -#define USBMODE 0x68 /* USB Device mode */ -#define USBMODE_SDIS (1<<3) /* Stream disable */ -#define USBMODE_BE (1<<2) /* BE/LE endianness select */ -#define USBMODE_CM_HC (3<<0) /* host controller mode */ -#define USBMODE_CM_IDLE (0<<0) /* idle state */ - -/* Appendix C, Debug port ... intended for use with special "debug devices" - * that can help if there's no serial console. (nonstandard enumeration.) - */ -struct ehci_dbg_port { - u32 control; -#define DBGP_OWNER (1<<30) -#define DBGP_ENABLED (1<<28) -#define DBGP_DONE (1<<16) -#define DBGP_INUSE (1<<10) -#define DBGP_ERRCODE(x) (((x)>>7)&0x07) -# define DBGP_ERR_BAD 1 -# define DBGP_ERR_SIGNAL 2 -#define DBGP_ERROR (1<<6) -#define DBGP_GO (1<<5) -#define DBGP_OUT (1<<4) -#define DBGP_LEN(x) (((x)>>0)&0x0f) - u32 pids; -#define DBGP_PID_GET(x) (((x)>>16)&0xff) -#define DBGP_PID_SET(data,tok) (((data)<<8)|(tok)) - u32 data03; - u32 data47; - u32 address; -#define DBGP_EPADDR(dev,ep) (((dev)<<8)|(ep)) -} __attribute__ ((packed)); +#include /*-------------------------------------------------------------------------*/ diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h new file mode 100644 index 0000000..5b88e36 --- /dev/null +++ b/include/linux/usb/ehci_def.h @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2001-2002 by David Brownell + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __LINUX_USB_EHCI_DEF_H +#define __LINUX_USB_EHCI_DEF_H + +/* EHCI register interface, corresponds to EHCI Revision 0.95 specification */ + +/* Section 2.2 Host Controller Capability Registers */ +struct ehci_caps { + /* these fields are specified as 8 and 16 bit registers, + * but some hosts can't perform 8 or 16 bit PCI accesses. + */ + u32 hc_capbase; +#define HC_LENGTH(p) (((p)>>00)&0x00ff) /* bits 7:0 */ +#define HC_VERSION(p) (((p)>>16)&0xffff) /* bits 31:16 */ + u32 hcs_params; /* HCSPARAMS - offset 0x4 */ +#define HCS_DEBUG_PORT(p) (((p)>>20)&0xf) /* bits 23:20, debug port? */ +#define HCS_INDICATOR(p) ((p)&(1 << 16)) /* true: has port indicators */ +#define HCS_N_CC(p) (((p)>>12)&0xf) /* bits 15:12, #companion HCs */ +#define HCS_N_PCC(p) (((p)>>8)&0xf) /* bits 11:8, ports per CC */ +#define HCS_PORTROUTED(p) ((p)&(1 << 7)) /* true: port routing */ +#define HCS_PPC(p) ((p)&(1 << 4)) /* true: port power control */ +#define HCS_N_PORTS(p) (((p)>>0)&0xf) /* bits 3:0, ports on HC */ + + u32 hcc_params; /* HCCPARAMS - offset 0x8 */ +#define HCC_EXT_CAPS(p) (((p)>>8)&0xff) /* for pci extended caps */ +#define HCC_ISOC_CACHE(p) ((p)&(1 << 7)) /* true: can cache isoc frame */ +#define HCC_ISOC_THRES(p) (((p)>>4)&0x7) /* bits 6:4, uframes cached */ +#define HCC_CANPARK(p) ((p)&(1 << 2)) /* true: can park on async qh */ +#define HCC_PGM_FRAMELISTLEN(p) ((p)&(1 << 1)) /* true: periodic_size changes*/ +#define HCC_64BIT_ADDR(p) ((p)&(1)) /* true: can use 64-bit addr */ + u8 portroute [8]; /* nibbles for routing - offset 0xC */ +} __attribute__ ((packed)); + + +/* Section 2.3 Host Controller Operational Registers */ +struct ehci_regs { + + /* USBCMD: offset 0x00 */ + u32 command; +/* 23:16 is r/w intr rate, in microframes; default "8" == 1/msec */ +#define CMD_PARK (1<<11) /* enable "park" on async qh */ +#define CMD_PARK_CNT(c) (((c)>>8)&3) /* how many transfers to park for */ +#define CMD_LRESET (1<<7) /* partial reset (no ports, etc) */ +#define CMD_IAAD (1<<6) /* "doorbell" interrupt async advance */ +#define CMD_ASE (1<<5) /* async schedule enable */ +#define CMD_PSE (1<<4) /* periodic schedule enable */ +/* 3:2 is periodic frame list size */ +#define CMD_RESET (1<<1) /* reset HC not bus */ +#define CMD_RUN (1<<0) /* start/stop HC */ + + /* USBSTS: offset 0x04 */ + u32 status; +#define STS_ASS (1<<15) /* Async Schedule Status */ +#define STS_PSS (1<<14) /* Periodic Schedule Status */ +#define STS_RECL (1<<13) /* Reclamation */ +#define STS_HALT (1<<12) /* Not running (any reason) */ +/* some bits reserved */ + /* these STS_* flags are also intr_enable bits (USBINTR) */ +#define STS_IAA (1<<5) /* Interrupted on async advance */ +#define STS_FATAL (1<<4) /* such as some PCI access errors */ +#define STS_FLR (1<<3) /* frame list rolled over */ +#define STS_PCD (1<<2) /* port change detect */ +#define STS_ERR (1<<1) /* "error" completion (overflow, ...) */ +#define STS_INT (1<<0) /* "normal" completion (short, ...) */ + + /* USBINTR: offset 0x08 */ + u32 intr_enable; + + /* FRINDEX: offset 0x0C */ + u32 frame_index; /* current microframe number */ + /* CTRLDSSEGMENT: offset 0x10 */ + u32 segment; /* address bits 63:32 if needed */ + /* PERIODICLISTBASE: offset 0x14 */ + u32 frame_list; /* points to periodic list */ + /* ASYNCLISTADDR: offset 0x18 */ + u32 async_next; /* address of next async queue head */ + + u32 reserved [9]; + + /* CONFIGFLAG: offset 0x40 */ + u32 configured_flag; +#define FLAG_CF (1<<0) /* true: we'll support "high speed" */ + + /* PORTSC: offset 0x44 */ + u32 port_status [0]; /* up to N_PORTS */ +/* 31:23 reserved */ +#define PORT_WKOC_E (1<<22) /* wake on overcurrent (enable) */ +#define PORT_WKDISC_E (1<<21) /* wake on disconnect (enable) */ +#define PORT_WKCONN_E (1<<20) /* wake on connect (enable) */ +/* 19:16 for port testing */ +#define PORT_LED_OFF (0<<14) +#define PORT_LED_AMBER (1<<14) +#define PORT_LED_GREEN (2<<14) +#define PORT_LED_MASK (3<<14) +#define PORT_OWNER (1<<13) /* true: companion hc owns this port */ +#define PORT_POWER (1<<12) /* true: has power (see PPC) */ +#define PORT_USB11(x) (((x)&(3<<10)) == (1<<10)) /* USB 1.1 device */ +/* 11:10 for detecting lowspeed devices (reset vs release ownership) */ +/* 9 reserved */ +#define PORT_RESET (1<<8) /* reset port */ +#define PORT_SUSPEND (1<<7) /* suspend port */ +#define PORT_RESUME (1<<6) /* resume it */ +#define PORT_OCC (1<<5) /* over current change */ +#define PORT_OC (1<<4) /* over current active */ +#define PORT_PEC (1<<3) /* port enable change */ +#define PORT_PE (1<<2) /* port enable */ +#define PORT_CSC (1<<1) /* connect status change */ +#define PORT_CONNECT (1<<0) /* device connected */ +#define PORT_RWC_BITS (PORT_CSC | PORT_PEC | PORT_OCC) +} __attribute__ ((packed)); + +#define USBMODE 0x68 /* USB Device mode */ +#define USBMODE_SDIS (1<<3) /* Stream disable */ +#define USBMODE_BE (1<<2) /* BE/LE endianness select */ +#define USBMODE_CM_HC (3<<0) /* host controller mode */ +#define USBMODE_CM_IDLE (0<<0) /* idle state */ + +/* Appendix C, Debug port ... intended for use with special "debug devices" + * that can help if there's no serial console. (nonstandard enumeration.) + */ +struct ehci_dbg_port { + u32 control; +#define DBGP_OWNER (1<<30) +#define DBGP_ENABLED (1<<28) +#define DBGP_DONE (1<<16) +#define DBGP_INUSE (1<<10) +#define DBGP_ERRCODE(x) (((x)>>7)&0x07) +# define DBGP_ERR_BAD 1 +# define DBGP_ERR_SIGNAL 2 +#define DBGP_ERROR (1<<6) +#define DBGP_GO (1<<5) +#define DBGP_OUT (1<<4) +#define DBGP_LEN(x) (((x)>>0)&0x0f) + u32 pids; +#define DBGP_PID_GET(x) (((x)>>16)&0xff) +#define DBGP_PID_SET(data, tok) (((data)<<8)|(tok)) + u32 data03; + u32 data47; + u32 address; +#define DBGP_EPADDR(dev, ep) (((dev)<<8)|(ep)) +} __attribute__ ((packed)); + +#endif /* __LINUX_USB_EHCI_DEF_H */ -- cgit v0.10.2 From 5c05917e7fe313a187ad6ebb94c1c6cf42862a0b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jul 2008 17:29:40 -0700 Subject: x86: usb debug port early console, v4 based on work from Eric, and add some timeout so don't dead loop when debug device is not installed v2: fix checkpatch warning v3: move ehci struct def to linux/usrb/ehci_def.h from host/ehci.h also add CONFIG_EARLY_PRINTK_DBGP to disable it by default v4: address comments from Ingo, seperate ehci reg def moving to another patch also add auto detect port that connect to debug device for Nvidia southbridge Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Andi Kleen Cc: "Arjan van de Ven" Cc: "Eric W. Biederman" Cc: "Greg KH" Signed-off-by: Ingo Molnar diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e7bea3e..92ddd4a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -657,11 +657,12 @@ and is between 256 and 4096 characters. It is defined in the file earlyprintk= [X86-32,X86-64,SH,BLACKFIN] earlyprintk=vga earlyprintk=serial[,ttySn[,baudrate]] + earlyprintk=dbgp Append ",keep" to not disable it when the real console takes over. - Only vga or serial at a time, not both. + Only vga or serial or usb debug port at a time. Currently only ttyS0 and ttyS1 are supported. diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 092f019..93422d2 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -43,6 +43,19 @@ config EARLY_PRINTK with klogd/syslogd or the X server. You should normally N here, unless you want to debug such a crash. +config EARLY_PRINTK_DBGP + bool "Early printk via EHCI debug port" + default n + depends on EARLY_PRINTK + help + Write kernel log output directly into the EHCI debug port. + + This is useful for kernel debugging when your machine crashes very + early before the console code is initialized. For normal operation + it is not recommended because it looks ugly and doesn't cooperate + with klogd/syslogd or the X server. You should normally N here, + unless you want to debug such a crash. You need usb debug device. + config DEBUG_STACKOVERFLOW bool "Check for stack overflows" depends on DEBUG_KERNEL diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index ff9e735..28155ac 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -3,11 +3,19 @@ #include #include #include +#include +#include +#include +#include #include #include #include #include #include +#include +#include +#include +#include /* Simple VGA output */ #define VGABASE (__ISA_IO_base + 0xb8000) @@ -78,6 +86,7 @@ static int early_serial_base = 0x3f8; /* ttyS0 */ static int early_serial_putc(unsigned char ch) { unsigned timeout = 0xffff; + while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) cpu_relax(); outb(ch, early_serial_base + TXR); @@ -151,6 +160,721 @@ static struct console early_serial_console = { .index = -1, }; +#ifdef CONFIG_EARLY_PRINTK_DBGP + +static struct ehci_caps __iomem *ehci_caps; +static struct ehci_regs __iomem *ehci_regs; +static struct ehci_dbg_port __iomem *ehci_debug; +static unsigned int dbgp_endpoint_out; + +struct ehci_dev { + u32 bus; + u32 slot; + u32 func; +}; + +static struct ehci_dev ehci_dev; + +#define USB_DEBUG_DEVNUM 127 + +#define DBGP_DATA_TOGGLE 0x8800 + +static inline u32 dbgp_pid_update(u32 x, u32 tok) +{ + return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff); +} + +static inline u32 dbgp_len_update(u32 x, u32 len) +{ + return (x & ~0x0f) | (len & 0x0f); +} + +/* + * USB Packet IDs (PIDs) + */ + +/* token */ +#define USB_PID_OUT 0xe1 +#define USB_PID_IN 0x69 +#define USB_PID_SOF 0xa5 +#define USB_PID_SETUP 0x2d +/* handshake */ +#define USB_PID_ACK 0xd2 +#define USB_PID_NAK 0x5a +#define USB_PID_STALL 0x1e +#define USB_PID_NYET 0x96 +/* data */ +#define USB_PID_DATA0 0xc3 +#define USB_PID_DATA1 0x4b +#define USB_PID_DATA2 0x87 +#define USB_PID_MDATA 0x0f +/* Special */ +#define USB_PID_PREAMBLE 0x3c +#define USB_PID_ERR 0x3c +#define USB_PID_SPLIT 0x78 +#define USB_PID_PING 0xb4 +#define USB_PID_UNDEF_0 0xf0 + +#define USB_PID_DATA_TOGGLE 0x88 +#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE) + +#define PCI_CAP_ID_EHCI_DEBUG 0xa + +#define HUB_ROOT_RESET_TIME 50 /* times are in msec */ +#define HUB_SHORT_RESET_TIME 10 +#define HUB_LONG_RESET_TIME 200 +#define HUB_RESET_TIMEOUT 500 + +#define DBGP_MAX_PACKET 8 + +static int dbgp_wait_until_complete(void) +{ + u32 ctrl; + int loop = 0x100000; + + do { + ctrl = readl(&ehci_debug->control); + /* Stop when the transaction is finished */ + if (ctrl & DBGP_DONE) + break; + } while (--loop > 0); + + if (!loop) + return -1; + + /* + * Now that we have observed the completed transaction, + * clear the done bit. + */ + writel(ctrl | DBGP_DONE, &ehci_debug->control); + return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl); +} + +static void dbgp_mdelay(int ms) +{ + int i; + + while (ms--) { + for (i = 0; i < 1000; i++) + outb(0x1, 0x80); + } +} + +static void dbgp_breath(void) +{ + /* Sleep to give the debug port a chance to breathe */ +} + +static int dbgp_wait_until_done(unsigned ctrl) +{ + u32 pids, lpid; + int ret; + int loop = 3; + +retry: + writel(ctrl | DBGP_GO, &ehci_debug->control); + ret = dbgp_wait_until_complete(); + pids = readl(&ehci_debug->pids); + lpid = DBGP_PID_GET(pids); + + if (ret < 0) + return ret; + + /* + * If the port is getting full or it has dropped data + * start pacing ourselves, not necessary but it's friendly. + */ + if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET)) + dbgp_breath(); + + /* If I get a NACK reissue the transmission */ + if (lpid == USB_PID_NAK) { + if (--loop > 0) + goto retry; + } + + return ret; +} + +static void dbgp_set_data(const void *buf, int size) +{ + const unsigned char *bytes = buf; + u32 lo, hi; + int i; + + lo = hi = 0; + for (i = 0; i < 4 && i < size; i++) + lo |= bytes[i] << (8*i); + for (; i < 8 && i < size; i++) + hi |= bytes[i] << (8*(i - 4)); + writel(lo, &ehci_debug->data03); + writel(hi, &ehci_debug->data47); +} + +static void dbgp_get_data(void *buf, int size) +{ + unsigned char *bytes = buf; + u32 lo, hi; + int i; + + lo = readl(&ehci_debug->data03); + hi = readl(&ehci_debug->data47); + for (i = 0; i < 4 && i < size; i++) + bytes[i] = (lo >> (8*i)) & 0xff; + for (; i < 8 && i < size; i++) + bytes[i] = (hi >> (8*(i - 4))) & 0xff; +} + +static int dbgp_bulk_write(unsigned devnum, unsigned endpoint, + const char *bytes, int size) +{ + u32 pids, addr, ctrl; + int ret; + + if (size > DBGP_MAX_PACKET) + return -1; + + addr = DBGP_EPADDR(devnum, endpoint); + + pids = readl(&ehci_debug->pids); + pids = dbgp_pid_update(pids, USB_PID_OUT); + + ctrl = readl(&ehci_debug->control); + ctrl = dbgp_len_update(ctrl, size); + ctrl |= DBGP_OUT; + ctrl |= DBGP_GO; + + dbgp_set_data(bytes, size); + writel(addr, &ehci_debug->address); + writel(pids, &ehci_debug->pids); + + ret = dbgp_wait_until_done(ctrl); + if (ret < 0) + return ret; + + return ret; +} + +static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data, + int size) +{ + u32 pids, addr, ctrl; + int ret; + + if (size > DBGP_MAX_PACKET) + return -1; + + addr = DBGP_EPADDR(devnum, endpoint); + + pids = readl(&ehci_debug->pids); + pids = dbgp_pid_update(pids, USB_PID_IN); + + ctrl = readl(&ehci_debug->control); + ctrl = dbgp_len_update(ctrl, size); + ctrl &= ~DBGP_OUT; + ctrl |= DBGP_GO; + + writel(addr, &ehci_debug->address); + writel(pids, &ehci_debug->pids); + ret = dbgp_wait_until_done(ctrl); + if (ret < 0) + return ret; + + if (size > ret) + size = ret; + dbgp_get_data(data, size); + return ret; +} + +static int dbgp_control_msg(unsigned devnum, int requesttype, int request, + int value, int index, void *data, int size) +{ + u32 pids, addr, ctrl; + struct usb_ctrlrequest req; + int read; + int ret; + + read = (requesttype & USB_DIR_IN) != 0; + if (size > (read ? DBGP_MAX_PACKET:0)) + return -1; + + /* Compute the control message */ + req.bRequestType = requesttype; + req.bRequest = request; + req.wValue = value; + req.wIndex = index; + req.wLength = size; + + pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP); + addr = DBGP_EPADDR(devnum, 0); + + ctrl = readl(&ehci_debug->control); + ctrl = dbgp_len_update(ctrl, sizeof(req)); + ctrl |= DBGP_OUT; + ctrl |= DBGP_GO; + + /* Send the setup message */ + dbgp_set_data(&req, sizeof(req)); + writel(addr, &ehci_debug->address); + writel(pids, &ehci_debug->pids); + ret = dbgp_wait_until_done(ctrl); + if (ret < 0) + return ret; + + /* Read the result */ + return dbgp_bulk_read(devnum, 0, data, size); +} + + +/* Find a PCI capability */ +static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap) +{ + u8 pos; + int bytes; + + if (!(read_pci_config_16(num, slot, func, PCI_STATUS) & + PCI_STATUS_CAP_LIST)) + return 0; + + pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST); + for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { + u8 id; + + pos &= ~3; + id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID); + if (id == 0xff) + break; + if (id == cap) + return pos; + + pos = read_pci_config_byte(num, slot, func, + pos+PCI_CAP_LIST_NEXT); + } + return 0; +} + +static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func) +{ + u32 class; + + class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION); + if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI) + return 0; + + return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG); +} + +static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc) +{ + u32 bus, slot, func; + + for (bus = 0; bus < 256; bus++) { + for (slot = 0; slot < 32; slot++) { + for (func = 0; func < 8; func++) { + unsigned cap; + + cap = __find_dbgp(bus, slot, func); + + if (!cap) + continue; + if (ehci_num-- != 0) + continue; + *rbus = bus; + *rslot = slot; + *rfunc = func; + return cap; + } + } + } + return 0; +} + +static int ehci_reset_port(int port) +{ + u32 portsc; + u32 delay_time, delay; + int loop; + + /* Reset the usb debug port */ + portsc = readl(&ehci_regs->port_status[port - 1]); + portsc &= ~PORT_PE; + portsc |= PORT_RESET; + writel(portsc, &ehci_regs->port_status[port - 1]); + + delay = HUB_ROOT_RESET_TIME; + for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT; + delay_time += delay) { + dbgp_mdelay(delay); + + portsc = readl(&ehci_regs->port_status[port - 1]); + if (portsc & PORT_RESET) { + /* force reset to complete */ + loop = 2; + writel(portsc & ~(PORT_RWC_BITS | PORT_RESET), + &ehci_regs->port_status[port - 1]); + do { + portsc = readl(&ehci_regs->port_status[port-1]); + } while ((portsc & PORT_RESET) && (--loop > 0)); + } + + /* Device went away? */ + if (!(portsc & PORT_CONNECT)) + return -ENOTCONN; + + /* bomb out completely if something weird happend */ + if ((portsc & PORT_CSC)) + return -EINVAL; + + /* If we've finished resetting, then break out of the loop */ + if (!(portsc & PORT_RESET) && (portsc & PORT_PE)) + return 0; + } + return -EBUSY; +} + +static int ehci_wait_for_port(int port) +{ + u32 status; + int ret, reps; + + for (reps = 0; reps < 3; reps++) { + dbgp_mdelay(100); + status = readl(&ehci_regs->status); + if (status & STS_PCD) { + ret = ehci_reset_port(port); + if (ret == 0) + return 0; + } + } + return -ENOTCONN; +} + +#ifdef DBGP_DEBUG +# define dbgp_printk early_printk +#else +static inline void dbgp_printk(const char *fmt, ...) { } +#endif + +typedef void (*set_debug_port_t)(int port); + +static void default_set_debug_port(int port) +{ +} + +static set_debug_port_t set_debug_port = default_set_debug_port; + +static void nvidia_set_debug_port(int port) +{ + u32 dword; + dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, + 0x74); + dword &= ~(0x0f<<12); + dword |= ((port & 0x0f)<<12); + write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74, + dword); + dbgp_printk("set debug port to %d\n", port); +} + +static void __init detect_set_debug_port(void) +{ + u32 vendorid; + + vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, + 0x00); + + if ((vendorid & 0xffff) == 0x10de) { + dbgp_printk("using nvidia set_debug_port\n"); + set_debug_port = nvidia_set_debug_port; + } +} + +static int __init ehci_setup(void) +{ + struct usb_debug_descriptor dbgp_desc; + u32 cmd, ctrl, status, portsc, hcs_params; + u32 debug_port, new_debug_port = 0, n_ports; + u32 devnum; + int ret, i; + int loop; + int port_map_tried; + int playtimes = 3; + +try_next_time: + port_map_tried = 0; + +try_next_port: + + hcs_params = readl(&ehci_caps->hcs_params); + debug_port = HCS_DEBUG_PORT(hcs_params); + n_ports = HCS_N_PORTS(hcs_params); + + dbgp_printk("debug_port: %d\n", debug_port); + dbgp_printk("n_ports: %d\n", n_ports); + + for (i = 1; i <= n_ports; i++) { + portsc = readl(&ehci_regs->port_status[i-1]); + dbgp_printk("portstatus%d: %08x\n", i, portsc); + } + + if (port_map_tried && (new_debug_port != debug_port)) { + if (--playtimes) { + set_debug_port(new_debug_port); + goto try_next_time; + } + return -1; + } + + loop = 10; + /* Reset the EHCI controller */ + cmd = readl(&ehci_regs->command); + cmd |= CMD_RESET; + writel(cmd, &ehci_regs->command); + do { + cmd = readl(&ehci_regs->command); + } while ((cmd & CMD_RESET) && (--loop > 0)); + + if (!loop) { + dbgp_printk("can not reset ehci\n"); + return -1; + } + dbgp_printk("ehci reset done\n"); + + /* Claim ownership, but do not enable yet */ + ctrl = readl(&ehci_debug->control); + ctrl |= DBGP_OWNER; + ctrl &= ~(DBGP_ENABLED | DBGP_INUSE); + writel(ctrl, &ehci_debug->control); + + /* Start the ehci running */ + cmd = readl(&ehci_regs->command); + cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET); + cmd |= CMD_RUN; + writel(cmd, &ehci_regs->command); + + /* Ensure everything is routed to the EHCI */ + writel(FLAG_CF, &ehci_regs->configured_flag); + + /* Wait until the controller is no longer halted */ + loop = 10; + do { + status = readl(&ehci_regs->status); + } while ((status & STS_HALT) && (--loop > 0)); + + if (!loop) { + dbgp_printk("ehci can be started\n"); + return -1; + } + dbgp_printk("ehci started\n"); + + /* Wait for a device to show up in the debug port */ + ret = ehci_wait_for_port(debug_port); + if (ret < 0) { + dbgp_printk("No device found in debug port\n"); + goto next_debug_port; + } + dbgp_printk("ehci wait for port done\n"); + + /* Enable the debug port */ + ctrl = readl(&ehci_debug->control); + ctrl |= DBGP_CLAIM; + writel(ctrl, &ehci_debug->control); + ctrl = readl(&ehci_debug->control); + if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) { + dbgp_printk("No device in debug port\n"); + writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control); + goto err; + } + dbgp_printk("debug ported enabled\n"); + + /* Completely transfer the debug device to the debug controller */ + portsc = readl(&ehci_regs->port_status[debug_port - 1]); + portsc &= ~PORT_PE; + writel(portsc, &ehci_regs->port_status[debug_port - 1]); + + dbgp_mdelay(100); + + /* Find the debug device and make it device number 127 */ + for (devnum = 0; devnum <= 127; devnum++) { + ret = dbgp_control_msg(devnum, + USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE, + USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0, + &dbgp_desc, sizeof(dbgp_desc)); + if (ret > 0) + break; + } + if (devnum > 127) { + dbgp_printk("Could not find attached debug device\n"); + goto err; + } + if (ret < 0) { + dbgp_printk("Attached device is not a debug device\n"); + goto err; + } + dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint; + + /* Move the device to 127 if it isn't already there */ + if (devnum != USB_DEBUG_DEVNUM) { + ret = dbgp_control_msg(devnum, + USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE, + USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0); + if (ret < 0) { + dbgp_printk("Could not move attached device to %d\n", + USB_DEBUG_DEVNUM); + goto err; + } + devnum = USB_DEBUG_DEVNUM; + dbgp_printk("debug device renamed to 127\n"); + } + + /* Enable the debug interface */ + ret = dbgp_control_msg(USB_DEBUG_DEVNUM, + USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE, + USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0); + if (ret < 0) { + dbgp_printk(" Could not enable the debug device\n"); + goto err; + } + dbgp_printk("debug interface enabled\n"); + + /* Perform a small write to get the even/odd data state in sync + */ + ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1); + if (ret < 0) { + dbgp_printk("dbgp_bulk_write failed: %d\n", ret); + goto err; + } + dbgp_printk("small write doned\n"); + + return 0; +err: + /* Things didn't work so remove my claim */ + ctrl = readl(&ehci_debug->control); + ctrl &= ~(DBGP_CLAIM | DBGP_OUT); + writel(ctrl, &ehci_debug->control); + return -1; + +next_debug_port: + port_map_tried |= (1<<(debug_port - 1)); + new_debug_port = ((debug_port-1+1)%n_ports) + 1; + if (port_map_tried != ((1<> 29) & 0x7; + bar = (bar * 4) + 0xc; + offset = (debug_port >> 16) & 0xfff; + dbgp_printk("bar: %02x offset: %03x\n", bar, offset); + if (bar != PCI_BASE_ADDRESS_0) { + dbgp_printk("only debug ports on bar 1 handled.\n"); + + return -1; + } + + bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0); + dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset); + if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) { + dbgp_printk("only simple 32bit mmio bars supported\n"); + + return -1; + } + + /* double check if the mem space is enabled */ + byte = read_pci_config_byte(bus, slot, func, 0x04); + if (!(byte & 0x2)) { + byte |= 0x02; + write_pci_config_byte(bus, slot, func, 0x04, byte); + dbgp_printk("mmio for ehci enabled\n"); + } + + /* + * FIXME I don't have the bar size so just guess PAGE_SIZE is more + * than enough. 1K is the biggest I have seen. + */ + set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK); + ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE); + ehci_bar += bar_val & ~PAGE_MASK; + dbgp_printk("ehci_bar: %p\n", ehci_bar); + + ehci_caps = ehci_bar; + ehci_regs = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase)); + ehci_debug = ehci_bar + offset; + ehci_dev.bus = bus; + ehci_dev.slot = slot; + ehci_dev.func = func; + + detect_set_debug_port(); + + ret = ehci_setup(); + if (ret < 0) { + dbgp_printk("ehci_setup failed\n"); + ehci_debug = 0; + + return -1; + } + + return 0; +} + +static void early_dbgp_write(struct console *con, const char *str, u32 n) +{ + int chunk, ret; + + if (!ehci_debug) + return; + while (n > 0) { + chunk = n; + if (chunk > DBGP_MAX_PACKET) + chunk = DBGP_MAX_PACKET; + ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, + dbgp_endpoint_out, str, chunk); + str += chunk; + n -= chunk; + } +} + +static struct console early_dbgp_console = { + .name = "earlydbg", + .write = early_dbgp_write, + .flags = CON_PRINTBUFFER, + .index = -1, +}; +#endif + /* Console interface to a host file on AMD's SimNow! */ static int simnow_fd; @@ -165,6 +889,7 @@ enum { static noinline long simnow(long cmd, long a, long b, long c) { long ret; + asm volatile("cpuid" : "=a" (ret) : "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2)); @@ -174,6 +899,7 @@ static noinline long simnow(long cmd, long a, long b, long c) static void __init simnow_init(char *str) { char *fn = "klog"; + if (*str == '=') fn = ++str; /* error ignored */ @@ -208,10 +934,11 @@ asmlinkage void early_printk(const char *fmt, ...) va_end(ap); } -static int __initdata keep_early; static int __init setup_early_printk(char *buf) { + int keep_early; + if (!buf) return 0; @@ -219,8 +946,7 @@ static int __init setup_early_printk(char *buf) return 0; early_console_initialized = 1; - if (strstr(buf, "keep")) - keep_early = 1; + keep_early = (strstr(buf, "keep") != NULL); if (!strncmp(buf, "serial", 6)) { early_serial_init(buf + 6); @@ -238,6 +964,17 @@ static int __init setup_early_printk(char *buf) simnow_init(buf + 6); early_console = &simnow_console; keep_early = 1; +#ifdef CONFIG_EARLY_PRINTK_DBGP + } else if (!strncmp(buf, "dbgp", 4)) { + if (early_dbgp_init(buf+4) < 0) + return 0; + early_console = &early_dbgp_console; + /* + * usb subsys will reset ehci controller, so don't keep + * that early console + */ + keep_early = 0; +#endif #ifdef CONFIG_HVC_XEN } else if (!strncmp(buf, "xen", 3)) { early_console = &xenboot_console; @@ -251,4 +988,23 @@ static int __init setup_early_printk(char *buf) register_console(early_console); return 0; } + +void __init enable_debug_console(char *buf) +{ +#ifdef DBGP_DEBUG + struct console *old_early_console = NULL; + + if (early_console_initialized && early_console) { + old_early_console = early_console; + unregister_console(early_console); + early_console_initialized = 0; + } + + setup_early_printk(buf); + + if (early_console == old_early_console && old_early_console) + register_console(old_early_console); +#endif +} + early_param("earlyprintk", setup_early_printk); -- cgit v0.10.2 From a4dbc34d181e87a0d724dee365921e9251f831d4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 25 Jul 2008 02:14:28 -0700 Subject: x86: add setup_ioapic_ids for numaq in x86_quirks Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 98e4db53..72ba063 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -1728,10 +1729,8 @@ static void __init setup_ioapic_ids_from_mpc(void) unsigned char old_id; unsigned long flags; -#ifdef CONFIG_X86_NUMAQ - if (found_numaq) + if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) return; -#endif /* * Don't check I/O APIC IDs for xAPIC systems. They have diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index b8c4561..2434467 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -229,6 +229,12 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, } } +static int __init numaq_setup_ioapic_ids(void) +{ + /* so can skip it */ + return 1; +} + static struct x86_quirks numaq_x86_quirks __initdata = { .arch_pre_time_init = numaq_pre_time_init, .arch_time_init = NULL, @@ -243,6 +249,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = { .mpc_oem_bus_info = mpc_oem_bus_info, .mpc_oem_pci_bus = mpc_oem_pci_bus, .smp_read_mpc_oem = smp_read_mpc_oem, + .setup_ioapic_ids = numaq_setup_ioapic_ids, }; void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h index b7aeba9..80fcc0d 100644 --- a/include/asm-x86/setup.h +++ b/include/asm-x86/setup.h @@ -38,6 +38,7 @@ struct x86_quirks { void (*mpc_oem_pci_bus)(struct mpc_config_bus *m); void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, unsigned short oemsize); + int (*setup_ioapic_ids)(void); }; extern struct x86_quirks *x86_quirks; -- cgit v0.10.2 From 1176fa919292887aa738d317d61fe2bba4d764f2 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 25 Jul 2008 02:17:21 -0700 Subject: x86: mach-bigsmp to bigsmp Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index b31f2800..df37fc9 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -12,10 +12,10 @@ #include #include #include -#include +#include #include -#include -#include +#include +#include #include static int dmi_bigsmp; /* can be set by dmi scanners */ diff --git a/include/asm-x86/bigsmp/apic.h b/include/asm-x86/bigsmp/apic.h new file mode 100644 index 0000000..0a9cd7c --- /dev/null +++ b/include/asm-x86/bigsmp/apic.h @@ -0,0 +1,144 @@ +#ifndef __ASM_MACH_APIC_H +#define __ASM_MACH_APIC_H + +#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) +#define esr_disable (1) + +static inline int apic_id_registered(void) +{ + return (1); +} + +/* Round robin the irqs amoung the online cpus */ +static inline cpumask_t target_cpus(void) +{ + static unsigned long cpu = NR_CPUS; + do { + if (cpu >= NR_CPUS) + cpu = first_cpu(cpu_online_map); + else + cpu = next_cpu(cpu, cpu_online_map); + } while (cpu >= NR_CPUS); + return cpumask_of_cpu(cpu); +} + +#undef APIC_DEST_LOGICAL +#define APIC_DEST_LOGICAL 0 +#define TARGET_CPUS (target_cpus()) +#define APIC_DFR_VALUE (APIC_DFR_FLAT) +#define INT_DELIVERY_MODE (dest_Fixed) +#define INT_DEST_MODE (0) /* phys delivery to target proc */ +#define NO_BALANCE_IRQ (0) +#define WAKE_SECONDARY_VIA_INIT + + +static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return (0); +} + +static inline unsigned long check_apicid_present(int bit) +{ + return (1); +} + +static inline unsigned long calculate_ldr(int cpu) +{ + unsigned long val, id; + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + id = xapic_phys_to_log_apicid(cpu); + val |= SET_APIC_LOGICAL_ID(id); + return val; +} + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +static inline void init_apic_ldr(void) +{ + unsigned long val; + int cpu = smp_processor_id(); + + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = calculate_ldr(cpu); + apic_write(APIC_LDR, val); +} + +static inline void setup_apic_routing(void) +{ + printk("Enabling APIC mode: %s. Using %d I/O APICs\n", + "Physflat", nr_ioapics); +} + +static inline int multi_timer_check(int apic, int irq) +{ + return (0); +} + +static inline int apicid_to_node(int logical_apicid) +{ + return apicid_2_node[hard_smp_processor_id()]; +} + +static inline int cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < NR_CPUS) + return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); + + return BAD_APICID; +} + +static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) +{ + return physid_mask_of_physid(phys_apicid); +} + +extern u8 cpu_2_logical_apicid[]; +/* Mapping from cpu number to logical apicid */ +static inline int cpu_to_logical_apicid(int cpu) +{ + if (cpu >= NR_CPUS) + return BAD_APICID; + return cpu_physical_id(cpu); +} + +static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) +{ + /* For clustered we don't have a good way to do this yet - hack */ + return physids_promote(0xFFL); +} + +static inline void setup_portio_remap(void) +{ +} + +static inline void enable_apic_mode(void) +{ +} + +static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return (1); +} + +/* As we are using single CPU as destination, pick only one CPU here */ +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +{ + int cpu; + int apicid; + + cpu = first_cpu(cpumask); + apicid = cpu_to_logical_apicid(cpu); + return apicid; +} + +static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +#endif /* __ASM_MACH_APIC_H */ diff --git a/include/asm-x86/bigsmp/apicdef.h b/include/asm-x86/bigsmp/apicdef.h new file mode 100644 index 0000000..392c3f5 --- /dev/null +++ b/include/asm-x86/bigsmp/apicdef.h @@ -0,0 +1,13 @@ +#ifndef __ASM_MACH_APICDEF_H +#define __ASM_MACH_APICDEF_H + +#define APIC_ID_MASK (0xFF<<24) + +static inline unsigned get_apic_id(unsigned long x) +{ + return (((x)>>24)&0xFF); +} + +#define GET_APIC_ID(x) get_apic_id(x) + +#endif diff --git a/include/asm-x86/bigsmp/ipi.h b/include/asm-x86/bigsmp/ipi.h new file mode 100644 index 0000000..9404c53 --- /dev/null +++ b/include/asm-x86/bigsmp/ipi.h @@ -0,0 +1,25 @@ +#ifndef __ASM_MACH_IPI_H +#define __ASM_MACH_IPI_H + +void send_IPI_mask_sequence(cpumask_t mask, int vector); + +static inline void send_IPI_mask(cpumask_t mask, int vector) +{ + send_IPI_mask_sequence(mask, vector); +} + +static inline void send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + send_IPI_mask(mask, vector); +} + +static inline void send_IPI_all(int vector) +{ + send_IPI_mask(cpu_online_map, vector); +} + +#endif /* __ASM_MACH_IPI_H */ diff --git a/include/asm-x86/mach-bigsmp/mach_apic.h b/include/asm-x86/mach-bigsmp/mach_apic.h deleted file mode 100644 index 05362d4..0000000 --- a/include/asm-x86/mach-bigsmp/mach_apic.h +++ /dev/null @@ -1,144 +0,0 @@ -#ifndef ASM_X86__MACH_BIGSMP__MACH_APIC_H -#define ASM_X86__MACH_BIGSMP__MACH_APIC_H - -#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) -#define esr_disable (1) - -static inline int apic_id_registered(void) -{ - return (1); -} - -/* Round robin the irqs amoung the online cpus */ -static inline cpumask_t target_cpus(void) -{ - static unsigned long cpu = NR_CPUS; - do { - if (cpu >= NR_CPUS) - cpu = first_cpu(cpu_online_map); - else - cpu = next_cpu(cpu, cpu_online_map); - } while (cpu >= NR_CPUS); - return cpumask_of_cpu(cpu); -} - -#undef APIC_DEST_LOGICAL -#define APIC_DEST_LOGICAL 0 -#define TARGET_CPUS (target_cpus()) -#define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define INT_DELIVERY_MODE (dest_Fixed) -#define INT_DEST_MODE (0) /* phys delivery to target proc */ -#define NO_BALANCE_IRQ (0) -#define WAKE_SECONDARY_VIA_INIT - - -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return (0); -} - -static inline unsigned long check_apicid_present(int bit) -{ - return (1); -} - -static inline unsigned long calculate_ldr(int cpu) -{ - unsigned long val, id; - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - id = xapic_phys_to_log_apicid(cpu); - val |= SET_APIC_LOGICAL_ID(id); - return val; -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -static inline void init_apic_ldr(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - -static inline void setup_apic_routing(void) -{ - printk("Enabling APIC mode: %s. Using %d I/O APICs\n", - "Physflat", nr_ioapics); -} - -static inline int multi_timer_check(int apic, int irq) -{ - return (0); -} - -static inline int apicid_to_node(int logical_apicid) -{ - return apicid_2_node[hard_smp_processor_id()]; -} - -static inline int cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < NR_CPUS) - return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); - - return BAD_APICID; -} - -static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - -extern u8 cpu_2_logical_apicid[]; -/* Mapping from cpu number to logical apicid */ -static inline int cpu_to_logical_apicid(int cpu) -{ - if (cpu >= NR_CPUS) - return BAD_APICID; - return cpu_physical_id(cpu); -} - -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) -{ - /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xFFL); -} - -static inline void setup_portio_remap(void) -{ -} - -static inline void enable_apic_mode(void) -{ -} - -static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return (1); -} - -/* As we are using single CPU as destination, pick only one CPU here */ -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) -{ - int cpu; - int apicid; - - cpu = first_cpu(cpumask); - apicid = cpu_to_logical_apicid(cpu); - return apicid; -} - -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -#endif /* ASM_X86__MACH_BIGSMP__MACH_APIC_H */ diff --git a/include/asm-x86/mach-bigsmp/mach_apicdef.h b/include/asm-x86/mach-bigsmp/mach_apicdef.h deleted file mode 100644 index 811935d..0000000 --- a/include/asm-x86/mach-bigsmp/mach_apicdef.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef ASM_X86__MACH_BIGSMP__MACH_APICDEF_H -#define ASM_X86__MACH_BIGSMP__MACH_APICDEF_H - -#define APIC_ID_MASK (0xFF<<24) - -static inline unsigned get_apic_id(unsigned long x) -{ - return (((x)>>24)&0xFF); -} - -#define GET_APIC_ID(x) get_apic_id(x) - -#endif /* ASM_X86__MACH_BIGSMP__MACH_APICDEF_H */ diff --git a/include/asm-x86/mach-bigsmp/mach_ipi.h b/include/asm-x86/mach-bigsmp/mach_ipi.h deleted file mode 100644 index b1b0f96..0000000 --- a/include/asm-x86/mach-bigsmp/mach_ipi.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef ASM_X86__MACH_BIGSMP__MACH_IPI_H -#define ASM_X86__MACH_BIGSMP__MACH_IPI_H - -void send_IPI_mask_sequence(cpumask_t mask, int vector); - -static inline void send_IPI_mask(cpumask_t mask, int vector) -{ - send_IPI_mask_sequence(mask, vector); -} - -static inline void send_IPI_allbutself(int vector) -{ - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - - if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); -} - -static inline void send_IPI_all(int vector) -{ - send_IPI_mask(cpu_online_map, vector); -} - -#endif /* ASM_X86__MACH_BIGSMP__MACH_IPI_H */ -- cgit v0.10.2 From c7e7964c9828083aeb41c2664cbf5a32acbfa9d1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 25 Jul 2008 02:17:33 -0700 Subject: x86: mach_es7000 to es7000 Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/arch/x86/es7000/Makefile b/arch/x86/es7000/Makefile new file mode 100644 index 0000000..3ef8b43 --- /dev/null +++ b/arch/x86/es7000/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the linux kernel. +# + +obj-$(CONFIG_X86_ES7000) := es7000plat.o diff --git a/arch/x86/es7000/es7000.h b/arch/x86/es7000/es7000.h new file mode 100644 index 0000000..4e62f6f --- /dev/null +++ b/arch/x86/es7000/es7000.h @@ -0,0 +1,114 @@ +/* + * Written by: Garry Forsgren, Unisys Corporation + * Natalie Protasevich, Unisys Corporation + * This file contains the code to configure and interface + * with Unisys ES7000 series hardware system manager. + * + * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Unisys Corporation, Township Line & Union Meeting + * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: + * + * http://www.unisys.com + */ + +/* + * ES7000 chipsets + */ + +#define NON_UNISYS 0 +#define ES7000_CLASSIC 1 +#define ES7000_ZORRO 2 + + +#define MIP_REG 1 +#define MIP_PSAI_REG 4 + +#define MIP_BUSY 1 +#define MIP_SPIN 0xf0000 +#define MIP_VALID 0x0100000000000000ULL +#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff) + +#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff) + +struct mip_reg_info { + unsigned long long mip_info; + unsigned long long delivery_info; + unsigned long long host_reg; + unsigned long long mip_reg; +}; + +struct part_info { + unsigned char type; + unsigned char length; + unsigned char part_id; + unsigned char apic_mode; + unsigned long snum; + char ptype[16]; + char sname[64]; + char pname[64]; +}; + +struct psai { + unsigned long long entry_type; + unsigned long long addr; + unsigned long long bep_addr; +}; + +struct es7000_mem_info { + unsigned char type; + unsigned char length; + unsigned char resv[6]; + unsigned long long start; + unsigned long long size; +}; + +struct es7000_oem_table { + unsigned long long hdr; + struct mip_reg_info mip; + struct part_info pif; + struct es7000_mem_info shm; + struct psai psai; +}; + +#ifdef CONFIG_ACPI + +struct oem_table { + struct acpi_table_header Header; + u32 OEMTableAddr; + u32 OEMTableSize; +}; + +extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); +#endif + +struct mip_reg { + unsigned long long off_0; + unsigned long long off_8; + unsigned long long off_10; + unsigned long long off_18; + unsigned long long off_20; + unsigned long long off_28; + unsigned long long off_30; + unsigned long long off_38; +}; + +#define MIP_SW_APIC 0x1020b +#define MIP_FUNC(VALUE) (VALUE & 0xff) + +extern int parse_unisys_oem (char *oemptr); +extern void setup_unisys(void); +extern int es7000_start_cpu(int cpu, unsigned long eip); +extern void es7000_sw_apic(void); diff --git a/arch/x86/es7000/es7000plat.c b/arch/x86/es7000/es7000plat.c new file mode 100644 index 0000000..7789fde --- /dev/null +++ b/arch/x86/es7000/es7000plat.c @@ -0,0 +1,262 @@ +/* + * Written by: Garry Forsgren, Unisys Corporation + * Natalie Protasevich, Unisys Corporation + * This file contains the code to configure and interface + * with Unisys ES7000 series hardware system manager. + * + * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Unisys Corporation, Township Line & Union Meeting + * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: + * + * http://www.unisys.com + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "es7000.h" +#include + +/* + * ES7000 Globals + */ + +static volatile unsigned long *psai = NULL; +static struct mip_reg *mip_reg; +static struct mip_reg *host_reg; +static int mip_port; +static unsigned long mip_addr, host_addr; + +int es7000_plat; + +/* + * GSI override for ES7000 platforms. + */ + +static unsigned int base; + +static int +es7000_rename_gsi(int ioapic, int gsi) +{ + if (es7000_plat == ES7000_ZORRO) + return gsi; + + if (!base) { + int i; + for (i = 0; i < nr_ioapics; i++) + base += nr_ioapic_registers[i]; + } + + if (!ioapic && (gsi < 16)) + gsi += base; + return gsi; +} + +void __init +setup_unisys(void) +{ + /* + * Determine the generation of the ES7000 currently running. + * + * es7000_plat = 1 if the machine is a 5xx ES7000 box + * es7000_plat = 2 if the machine is a x86_64 ES7000 box + * + */ + if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2)) + es7000_plat = ES7000_ZORRO; + else + es7000_plat = ES7000_CLASSIC; + ioapic_renumber_irq = es7000_rename_gsi; +} + +/* + * Parse the OEM Table + */ + +int __init +parse_unisys_oem (char *oemptr) +{ + int i; + int success = 0; + unsigned char type, size; + unsigned long val; + char *tp = NULL; + struct psai *psaip = NULL; + struct mip_reg_info *mi; + struct mip_reg *host, *mip; + + tp = oemptr; + + tp += 8; + + for (i=0; i <= 6; i++) { + type = *tp++; + size = *tp++; + tp -= 2; + switch (type) { + case MIP_REG: + mi = (struct mip_reg_info *)tp; + val = MIP_RD_LO(mi->host_reg); + host_addr = val; + host = (struct mip_reg *)val; + host_reg = __va(host); + val = MIP_RD_LO(mi->mip_reg); + mip_port = MIP_PORT(mi->mip_info); + mip_addr = val; + mip = (struct mip_reg *)val; + mip_reg = __va(mip); + pr_debug("es7000_mipcfg: host_reg = 0x%lx \n", + (unsigned long)host_reg); + pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n", + (unsigned long)mip_reg); + success++; + break; + case MIP_PSAI_REG: + psaip = (struct psai *)tp; + if (tp != NULL) { + if (psaip->addr) + psai = __va(psaip->addr); + else + psai = NULL; + success++; + } + break; + default: + break; + } + tp += size; + } + + if (success < 2) { + es7000_plat = NON_UNISYS; + } else + setup_unisys(); + return es7000_plat; +} + +#ifdef CONFIG_ACPI +int __init +find_unisys_acpi_oem_table(unsigned long *oem_addr) +{ + struct acpi_table_header *header = NULL; + int i = 0; + while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) { + if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { + struct oem_table *t = (struct oem_table *)header; + *oem_addr = (unsigned long)__acpi_map_table(t->OEMTableAddr, + t->OEMTableSize); + return 0; + } + } + return -1; +} +#endif + +static void +es7000_spin(int n) +{ + int i = 0; + + while (i++ < n) + rep_nop(); +} + +static int __init +es7000_mip_write(struct mip_reg *mip_reg) +{ + int status = 0; + int spin; + + spin = MIP_SPIN; + while (((unsigned long long)host_reg->off_38 & + (unsigned long long)MIP_VALID) != 0) { + if (--spin <= 0) { + printk("es7000_mip_write: Timeout waiting for Host Valid Flag"); + return -1; + } + es7000_spin(MIP_SPIN); + } + + memcpy(host_reg, mip_reg, sizeof(struct mip_reg)); + outb(1, mip_port); + + spin = MIP_SPIN; + + while (((unsigned long long)mip_reg->off_38 & + (unsigned long long)MIP_VALID) == 0) { + if (--spin <= 0) { + printk("es7000_mip_write: Timeout waiting for MIP Valid Flag"); + return -1; + } + es7000_spin(MIP_SPIN); + } + + status = ((unsigned long long)mip_reg->off_0 & + (unsigned long long)0xffff0000000000ULL) >> 48; + mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 & + (unsigned long long)~MIP_VALID); + return status; +} + +int +es7000_start_cpu(int cpu, unsigned long eip) +{ + unsigned long vect = 0, psaival = 0; + + if (psai == NULL) + return -1; + + vect = ((unsigned long)__pa(eip)/0x1000) << 16; + psaival = (0x1000000 | vect | cpu); + + while (*psai & 0x1000000) + ; + + *psai = psaival; + + return 0; + +} + +void __init +es7000_sw_apic(void) +{ + if (es7000_plat) { + int mip_status; + struct mip_reg es7000_mip_reg; + + printk("ES7000: Enabling APIC mode.\n"); + memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); + es7000_mip_reg.off_0 = MIP_SW_APIC; + es7000_mip_reg.off_38 = (MIP_VALID); + while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) + printk("es7000_sw_apic: command failed, status = %x\n", + mip_status); + return; + } +} diff --git a/arch/x86/mach-es7000/Makefile b/arch/x86/mach-es7000/Makefile deleted file mode 100644 index 3ef8b43..0000000 --- a/arch/x86/mach-es7000/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# -# Makefile for the linux kernel. -# - -obj-$(CONFIG_X86_ES7000) := es7000plat.o diff --git a/arch/x86/mach-es7000/es7000.h b/arch/x86/mach-es7000/es7000.h deleted file mode 100644 index c8d5aa1..0000000 --- a/arch/x86/mach-es7000/es7000.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Written by: Garry Forsgren, Unisys Corporation - * Natalie Protasevich, Unisys Corporation - * This file contains the code to configure and interface - * with Unisys ES7000 series hardware system manager. - * - * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston MA 02111-1307, USA. - * - * Contact information: Unisys Corporation, Township Line & Union Meeting - * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: - * - * http://www.unisys.com - */ - -/* - * ES7000 chipsets - */ - -#define NON_UNISYS 0 -#define ES7000_CLASSIC 1 -#define ES7000_ZORRO 2 - - -#define MIP_REG 1 -#define MIP_PSAI_REG 4 - -#define MIP_BUSY 1 -#define MIP_SPIN 0xf0000 -#define MIP_VALID 0x0100000000000000ULL -#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff) - -#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff) - -struct mip_reg_info { - unsigned long long mip_info; - unsigned long long delivery_info; - unsigned long long host_reg; - unsigned long long mip_reg; -}; - -struct part_info { - unsigned char type; - unsigned char length; - unsigned char part_id; - unsigned char apic_mode; - unsigned long snum; - char ptype[16]; - char sname[64]; - char pname[64]; -}; - -struct psai { - unsigned long long entry_type; - unsigned long long addr; - unsigned long long bep_addr; -}; - -struct es7000_mem_info { - unsigned char type; - unsigned char length; - unsigned char resv[6]; - unsigned long long start; - unsigned long long size; -}; - -struct es7000_oem_table { - unsigned long long hdr; - struct mip_reg_info mip; - struct part_info pif; - struct es7000_mem_info shm; - struct psai psai; -}; - -#ifdef CONFIG_ACPI - -struct oem_table { - struct acpi_table_header Header; - u32 OEMTableAddr; - u32 OEMTableSize; -}; - -extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); -#endif - -struct mip_reg { - unsigned long long off_0; - unsigned long long off_8; - unsigned long long off_10; - unsigned long long off_18; - unsigned long long off_20; - unsigned long long off_28; - unsigned long long off_30; - unsigned long long off_38; -}; - -#define MIP_SW_APIC 0x1020b -#define MIP_FUNC(VALUE) (VALUE & 0xff) - -extern int parse_unisys_oem (char *oemptr); -extern void setup_unisys(void); -extern int es7000_start_cpu(int cpu, unsigned long eip); -extern void es7000_sw_apic(void); diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/mach-es7000/es7000plat.c deleted file mode 100644 index 50189af..0000000 --- a/arch/x86/mach-es7000/es7000plat.c +++ /dev/null @@ -1,262 +0,0 @@ -/* - * Written by: Garry Forsgren, Unisys Corporation - * Natalie Protasevich, Unisys Corporation - * This file contains the code to configure and interface - * with Unisys ES7000 series hardware system manager. - * - * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston MA 02111-1307, USA. - * - * Contact information: Unisys Corporation, Township Line & Union Meeting - * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: - * - * http://www.unisys.com - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "es7000.h" -#include - -/* - * ES7000 Globals - */ - -static volatile unsigned long *psai = NULL; -static struct mip_reg *mip_reg; -static struct mip_reg *host_reg; -static int mip_port; -static unsigned long mip_addr, host_addr; - -int es7000_plat; - -/* - * GSI override for ES7000 platforms. - */ - -static unsigned int base; - -static int -es7000_rename_gsi(int ioapic, int gsi) -{ - if (es7000_plat == ES7000_ZORRO) - return gsi; - - if (!base) { - int i; - for (i = 0; i < nr_ioapics; i++) - base += nr_ioapic_registers[i]; - } - - if (!ioapic && (gsi < 16)) - gsi += base; - return gsi; -} - -void __init -setup_unisys(void) -{ - /* - * Determine the generation of the ES7000 currently running. - * - * es7000_plat = 1 if the machine is a 5xx ES7000 box - * es7000_plat = 2 if the machine is a x86_64 ES7000 box - * - */ - if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2)) - es7000_plat = ES7000_ZORRO; - else - es7000_plat = ES7000_CLASSIC; - ioapic_renumber_irq = es7000_rename_gsi; -} - -/* - * Parse the OEM Table - */ - -int __init -parse_unisys_oem (char *oemptr) -{ - int i; - int success = 0; - unsigned char type, size; - unsigned long val; - char *tp = NULL; - struct psai *psaip = NULL; - struct mip_reg_info *mi; - struct mip_reg *host, *mip; - - tp = oemptr; - - tp += 8; - - for (i=0; i <= 6; i++) { - type = *tp++; - size = *tp++; - tp -= 2; - switch (type) { - case MIP_REG: - mi = (struct mip_reg_info *)tp; - val = MIP_RD_LO(mi->host_reg); - host_addr = val; - host = (struct mip_reg *)val; - host_reg = __va(host); - val = MIP_RD_LO(mi->mip_reg); - mip_port = MIP_PORT(mi->mip_info); - mip_addr = val; - mip = (struct mip_reg *)val; - mip_reg = __va(mip); - pr_debug("es7000_mipcfg: host_reg = 0x%lx \n", - (unsigned long)host_reg); - pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n", - (unsigned long)mip_reg); - success++; - break; - case MIP_PSAI_REG: - psaip = (struct psai *)tp; - if (tp != NULL) { - if (psaip->addr) - psai = __va(psaip->addr); - else - psai = NULL; - success++; - } - break; - default: - break; - } - tp += size; - } - - if (success < 2) { - es7000_plat = NON_UNISYS; - } else - setup_unisys(); - return es7000_plat; -} - -#ifdef CONFIG_ACPI -int __init -find_unisys_acpi_oem_table(unsigned long *oem_addr) -{ - struct acpi_table_header *header = NULL; - int i = 0; - while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) { - if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { - struct oem_table *t = (struct oem_table *)header; - *oem_addr = (unsigned long)__acpi_map_table(t->OEMTableAddr, - t->OEMTableSize); - return 0; - } - } - return -1; -} -#endif - -static void -es7000_spin(int n) -{ - int i = 0; - - while (i++ < n) - rep_nop(); -} - -static int __init -es7000_mip_write(struct mip_reg *mip_reg) -{ - int status = 0; - int spin; - - spin = MIP_SPIN; - while (((unsigned long long)host_reg->off_38 & - (unsigned long long)MIP_VALID) != 0) { - if (--spin <= 0) { - printk("es7000_mip_write: Timeout waiting for Host Valid Flag"); - return -1; - } - es7000_spin(MIP_SPIN); - } - - memcpy(host_reg, mip_reg, sizeof(struct mip_reg)); - outb(1, mip_port); - - spin = MIP_SPIN; - - while (((unsigned long long)mip_reg->off_38 & - (unsigned long long)MIP_VALID) == 0) { - if (--spin <= 0) { - printk("es7000_mip_write: Timeout waiting for MIP Valid Flag"); - return -1; - } - es7000_spin(MIP_SPIN); - } - - status = ((unsigned long long)mip_reg->off_0 & - (unsigned long long)0xffff0000000000ULL) >> 48; - mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 & - (unsigned long long)~MIP_VALID); - return status; -} - -int -es7000_start_cpu(int cpu, unsigned long eip) -{ - unsigned long vect = 0, psaival = 0; - - if (psai == NULL) - return -1; - - vect = ((unsigned long)__pa(eip)/0x1000) << 16; - psaival = (0x1000000 | vect | cpu); - - while (*psai & 0x1000000) - ; - - *psai = psaival; - - return 0; - -} - -void __init -es7000_sw_apic(void) -{ - if (es7000_plat) { - int mip_status; - struct mip_reg es7000_mip_reg; - - printk("ES7000: Enabling APIC mode.\n"); - memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); - es7000_mip_reg.off_0 = MIP_SW_APIC; - es7000_mip_reg.off_38 = (MIP_VALID); - while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) - printk("es7000_sw_apic: command failed, status = %x\n", - mip_status); - return; - } -} diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile index 0dbd780..4706de7 100644 --- a/arch/x86/mach-generic/Makefile +++ b/arch/x86/mach-generic/Makefile @@ -9,4 +9,4 @@ obj-$(CONFIG_X86_NUMAQ) += numaq.o obj-$(CONFIG_X86_SUMMIT) += summit.o obj-$(CONFIG_X86_BIGSMP) += bigsmp.o obj-$(CONFIG_X86_ES7000) += es7000.o -obj-$(CONFIG_X86_ES7000) += ../../x86/mach-es7000/ +obj-$(CONFIG_X86_ES7000) += ../../x86/es7000/ diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 9b30547..520cca0 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -11,12 +11,12 @@ #include #include #include -#include +#include #include -#include -#include -#include -#include +#include +#include +#include +#include static int probe_es7000(void) { diff --git a/include/asm-x86/es7000/apic.h b/include/asm-x86/es7000/apic.h new file mode 100644 index 0000000..bd2c44d --- /dev/null +++ b/include/asm-x86/es7000/apic.h @@ -0,0 +1,194 @@ +#ifndef __ASM_ES7000_APIC_H +#define __ASM_ES7000_APIC_H + +#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) +#define esr_disable (1) + +static inline int apic_id_registered(void) +{ + return (1); +} + +static inline cpumask_t target_cpus(void) +{ +#if defined CONFIG_ES7000_CLUSTERED_APIC + return CPU_MASK_ALL; +#else + return cpumask_of_cpu(smp_processor_id()); +#endif +} +#define TARGET_CPUS (target_cpus()) + +#if defined CONFIG_ES7000_CLUSTERED_APIC +#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) +#define INT_DELIVERY_MODE (dest_LowestPrio) +#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */ +#define NO_BALANCE_IRQ (1) +#undef WAKE_SECONDARY_VIA_INIT +#define WAKE_SECONDARY_VIA_MIP +#else +#define APIC_DFR_VALUE (APIC_DFR_FLAT) +#define INT_DELIVERY_MODE (dest_Fixed) +#define INT_DEST_MODE (0) /* phys delivery to target procs */ +#define NO_BALANCE_IRQ (0) +#undef APIC_DEST_LOGICAL +#define APIC_DEST_LOGICAL 0x0 +#define WAKE_SECONDARY_VIA_INIT +#endif + +static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return 0; +} +static inline unsigned long check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +#define apicid_cluster(apicid) (apicid & 0xF0) + +static inline unsigned long calculate_ldr(int cpu) +{ + unsigned long id; + id = xapic_phys_to_log_apicid(cpu); + return (SET_APIC_LOGICAL_ID(id)); +} + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LdR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +static inline void init_apic_ldr(void) +{ + unsigned long val; + int cpu = smp_processor_id(); + + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = calculate_ldr(cpu); + apic_write(APIC_LDR, val); +} + +#ifndef CONFIG_X86_GENERICARCH +extern void enable_apic_mode(void); +#endif + +extern int apic_version [MAX_APICS]; +static inline void setup_apic_routing(void) +{ + int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); + printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", + (apic_version[apic] == 0x14) ? + "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(TARGET_CPUS)[0]); +} + +static inline int multi_timer_check(int apic, int irq) +{ + return 0; +} + +static inline int apicid_to_node(int logical_apicid) +{ + return 0; +} + + +static inline int cpu_present_to_apicid(int mps_cpu) +{ + if (!mps_cpu) + return boot_cpu_physical_apicid; + else if (mps_cpu < NR_CPUS) + return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) +{ + static int id = 0; + physid_mask_t mask; + mask = physid_mask_of_physid(id); + ++id; + return mask; +} + +extern u8 cpu_2_logical_apicid[]; +/* Mapping from cpu number to logical apicid */ +static inline int cpu_to_logical_apicid(int cpu) +{ +#ifdef CONFIG_SMP + if (cpu >= NR_CPUS) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; +#else + return logical_smp_processor_id(); +#endif +} + +static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) +{ + /* For clustered we don't have a good way to do this yet - hack */ + return physids_promote(0xff); +} + + +static inline void setup_portio_remap(void) +{ +} + +extern unsigned int boot_cpu_physical_apicid; +static inline int check_phys_apicid_present(int cpu_physical_apicid) +{ + boot_cpu_physical_apicid = read_apic_id(); + return (1); +} + +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +{ + int num_bits_set; + int cpus_found = 0; + int cpu; + int apicid; + + num_bits_set = cpus_weight(cpumask); + /* Return id to all */ + if (num_bits_set == NR_CPUS) +#if defined CONFIG_ES7000_CLUSTERED_APIC + return 0xFF; +#else + return cpu_to_logical_apicid(0); +#endif + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of TARGET_CPUS. + */ + cpu = first_cpu(cpumask); + apicid = cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { + if (cpu_isset(cpu, cpumask)) { + int new_apicid = cpu_to_logical_apicid(cpu); + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)){ + printk ("%s: Not a valid mask!\n",__FUNCTION__); +#if defined CONFIG_ES7000_CLUSTERED_APIC + return 0xFF; +#else + return cpu_to_logical_apicid(0); +#endif + } + apicid = new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + +static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +#endif /* __ASM_ES7000_APIC_H */ diff --git a/include/asm-x86/es7000/apicdef.h b/include/asm-x86/es7000/apicdef.h new file mode 100644 index 0000000..8b234a3 --- /dev/null +++ b/include/asm-x86/es7000/apicdef.h @@ -0,0 +1,13 @@ +#ifndef __ASM_ES7000_APICDEF_H +#define __ASM_ES7000_APICDEF_H + +#define APIC_ID_MASK (0xFF<<24) + +static inline unsigned get_apic_id(unsigned long x) +{ + return (((x)>>24)&0xFF); +} + +#define GET_APIC_ID(x) get_apic_id(x) + +#endif diff --git a/include/asm-x86/es7000/ipi.h b/include/asm-x86/es7000/ipi.h new file mode 100644 index 0000000..632a955 --- /dev/null +++ b/include/asm-x86/es7000/ipi.h @@ -0,0 +1,24 @@ +#ifndef __ASM_ES7000_IPI_H +#define __ASM_ES7000_IPI_H + +void send_IPI_mask_sequence(cpumask_t mask, int vector); + +static inline void send_IPI_mask(cpumask_t mask, int vector) +{ + send_IPI_mask_sequence(mask, vector); +} + +static inline void send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + if (!cpus_empty(mask)) + send_IPI_mask(mask, vector); +} + +static inline void send_IPI_all(int vector) +{ + send_IPI_mask(cpu_online_map, vector); +} + +#endif /* __ASM_ES7000_IPI_H */ diff --git a/include/asm-x86/es7000/mpparse.h b/include/asm-x86/es7000/mpparse.h new file mode 100644 index 0000000..7b5c889 --- /dev/null +++ b/include/asm-x86/es7000/mpparse.h @@ -0,0 +1,29 @@ +#ifndef __ASM_ES7000_MPPARSE_H +#define __ASM_ES7000_MPPARSE_H + +#include + +extern int parse_unisys_oem (char *oemptr); +extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); +extern void setup_unisys(void); + +#ifndef CONFIG_X86_GENERICARCH +extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id); +extern int mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid); +#endif + +#ifdef CONFIG_ACPI + +static inline int es7000_check_dsdt(void) +{ + struct acpi_table_header header; + + if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && + !strncmp(header.oem_id, "UNISYS", 6)) + return 1; + return 0; +} +#endif + +#endif /* __ASM_MACH_MPPARSE_H */ diff --git a/include/asm-x86/es7000/wakecpu.h b/include/asm-x86/es7000/wakecpu.h new file mode 100644 index 0000000..3ffc5a7 --- /dev/null +++ b/include/asm-x86/es7000/wakecpu.h @@ -0,0 +1,59 @@ +#ifndef __ASM_ES7000_WAKECPU_H +#define __ASM_ES7000_WAKECPU_H + +/* + * This file copes with machines that wakeup secondary CPUs by the + * INIT, INIT, STARTUP sequence. + */ + +#ifdef CONFIG_ES7000_CLUSTERED_APIC +#define WAKE_SECONDARY_VIA_MIP +#else +#define WAKE_SECONDARY_VIA_INIT +#endif + +#ifdef WAKE_SECONDARY_VIA_MIP +extern int es7000_start_cpu(int cpu, unsigned long eip); +static inline int +wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) +{ + int boot_error = 0; + boot_error = es7000_start_cpu(phys_apicid, start_eip); + return boot_error; +} +#endif + +#define TRAMPOLINE_LOW phys_to_virt(0x467) +#define TRAMPOLINE_HIGH phys_to_virt(0x469) + +#define boot_cpu_apicid boot_cpu_physical_apicid + +static inline void wait_for_init_deassert(atomic_t *deassert) +{ +#ifdef WAKE_SECONDARY_VIA_INIT + while (!atomic_read(deassert)) + cpu_relax(); +#endif + return; +} + +/* Nothing to do for most platforms, since cleared by the INIT cycle */ +static inline void smp_callin_clear_local_apic(void) +{ +} + +static inline void store_NMI_vector(unsigned short *high, unsigned short *low) +{ +} + +static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) +{ +} + +#if APIC_DEBUG + #define inquire_remote_apic(apicid) __inquire_remote_apic(apicid) +#else + #define inquire_remote_apic(apicid) {} +#endif + +#endif /* __ASM_MACH_WAKECPU_H */ diff --git a/include/asm-x86/mach-es7000/mach_apic.h b/include/asm-x86/mach-es7000/mach_apic.h deleted file mode 100644 index ae877ec..0000000 --- a/include/asm-x86/mach-es7000/mach_apic.h +++ /dev/null @@ -1,194 +0,0 @@ -#ifndef ASM_X86__MACH_ES7000__MACH_APIC_H -#define ASM_X86__MACH_ES7000__MACH_APIC_H - -#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) -#define esr_disable (1) - -static inline int apic_id_registered(void) -{ - return (1); -} - -static inline cpumask_t target_cpus(void) -{ -#if defined CONFIG_ES7000_CLUSTERED_APIC - return CPU_MASK_ALL; -#else - return cpumask_of_cpu(smp_processor_id()); -#endif -} -#define TARGET_CPUS (target_cpus()) - -#if defined CONFIG_ES7000_CLUSTERED_APIC -#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -#define INT_DELIVERY_MODE (dest_LowestPrio) -#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */ -#define NO_BALANCE_IRQ (1) -#undef WAKE_SECONDARY_VIA_INIT -#define WAKE_SECONDARY_VIA_MIP -#else -#define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define INT_DELIVERY_MODE (dest_Fixed) -#define INT_DEST_MODE (0) /* phys delivery to target procs */ -#define NO_BALANCE_IRQ (0) -#undef APIC_DEST_LOGICAL -#define APIC_DEST_LOGICAL 0x0 -#define WAKE_SECONDARY_VIA_INIT -#endif - -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return 0; -} -static inline unsigned long check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -#define apicid_cluster(apicid) (apicid & 0xF0) - -static inline unsigned long calculate_ldr(int cpu) -{ - unsigned long id; - id = xapic_phys_to_log_apicid(cpu); - return (SET_APIC_LOGICAL_ID(id)); -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LdR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -static inline void init_apic_ldr(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - -#ifndef CONFIG_X86_GENERICARCH -extern void enable_apic_mode(void); -#endif - -extern int apic_version [MAX_APICS]; -static inline void setup_apic_routing(void) -{ - int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); - printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", - (apic_version[apic] == 0x14) ? - "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(TARGET_CPUS)[0]); -} - -static inline int multi_timer_check(int apic, int irq) -{ - return 0; -} - -static inline int apicid_to_node(int logical_apicid) -{ - return 0; -} - - -static inline int cpu_present_to_apicid(int mps_cpu) -{ - if (!mps_cpu) - return boot_cpu_physical_apicid; - else if (mps_cpu < NR_CPUS) - return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) -{ - static int id = 0; - physid_mask_t mask; - mask = physid_mask_of_physid(id); - ++id; - return mask; -} - -extern u8 cpu_2_logical_apicid[]; -/* Mapping from cpu number to logical apicid */ -static inline int cpu_to_logical_apicid(int cpu) -{ -#ifdef CONFIG_SMP - if (cpu >= NR_CPUS) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; -#else - return logical_smp_processor_id(); -#endif -} - -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) -{ - /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xff); -} - - -static inline void setup_portio_remap(void) -{ -} - -extern unsigned int boot_cpu_physical_apicid; -static inline int check_phys_apicid_present(int cpu_physical_apicid) -{ - boot_cpu_physical_apicid = read_apic_id(); - return (1); -} - -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) -{ - int num_bits_set; - int cpus_found = 0; - int cpu; - int apicid; - - num_bits_set = cpus_weight(cpumask); - /* Return id to all */ - if (num_bits_set == NR_CPUS) -#if defined CONFIG_ES7000_CLUSTERED_APIC - return 0xFF; -#else - return cpu_to_logical_apicid(0); -#endif - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. - */ - cpu = first_cpu(cpumask); - apicid = cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ - printk ("%s: Not a valid mask!\n",__FUNCTION__); -#if defined CONFIG_ES7000_CLUSTERED_APIC - return 0xFF; -#else - return cpu_to_logical_apicid(0); -#endif - } - apicid = new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -#endif /* ASM_X86__MACH_ES7000__MACH_APIC_H */ diff --git a/include/asm-x86/mach-es7000/mach_apicdef.h b/include/asm-x86/mach-es7000/mach_apicdef.h deleted file mode 100644 index a07e567..0000000 --- a/include/asm-x86/mach-es7000/mach_apicdef.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef ASM_X86__MACH_ES7000__MACH_APICDEF_H -#define ASM_X86__MACH_ES7000__MACH_APICDEF_H - -#define APIC_ID_MASK (0xFF<<24) - -static inline unsigned get_apic_id(unsigned long x) -{ - return (((x)>>24)&0xFF); -} - -#define GET_APIC_ID(x) get_apic_id(x) - -#endif /* ASM_X86__MACH_ES7000__MACH_APICDEF_H */ diff --git a/include/asm-x86/mach-es7000/mach_ipi.h b/include/asm-x86/mach-es7000/mach_ipi.h deleted file mode 100644 index 3a21240..0000000 --- a/include/asm-x86/mach-es7000/mach_ipi.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef ASM_X86__MACH_ES7000__MACH_IPI_H -#define ASM_X86__MACH_ES7000__MACH_IPI_H - -void send_IPI_mask_sequence(cpumask_t mask, int vector); - -static inline void send_IPI_mask(cpumask_t mask, int vector) -{ - send_IPI_mask_sequence(mask, vector); -} - -static inline void send_IPI_allbutself(int vector) -{ - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); -} - -static inline void send_IPI_all(int vector) -{ - send_IPI_mask(cpu_online_map, vector); -} - -#endif /* ASM_X86__MACH_ES7000__MACH_IPI_H */ diff --git a/include/asm-x86/mach-es7000/mach_mpparse.h b/include/asm-x86/mach-es7000/mach_mpparse.h deleted file mode 100644 index befde24..0000000 --- a/include/asm-x86/mach-es7000/mach_mpparse.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef ASM_X86__MACH_ES7000__MACH_MPPARSE_H -#define ASM_X86__MACH_ES7000__MACH_MPPARSE_H - -#include - -extern int parse_unisys_oem (char *oemptr); -extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); -extern void setup_unisys(void); - -#ifndef CONFIG_X86_GENERICARCH -extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id); -extern int mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid); -#endif - -#ifdef CONFIG_ACPI - -static inline int es7000_check_dsdt(void) -{ - struct acpi_table_header header; - - if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && - !strncmp(header.oem_id, "UNISYS", 6)) - return 1; - return 0; -} -#endif - -#endif /* ASM_X86__MACH_ES7000__MACH_MPPARSE_H */ diff --git a/include/asm-x86/mach-es7000/mach_wakecpu.h b/include/asm-x86/mach-es7000/mach_wakecpu.h deleted file mode 100644 index 97c776c..0000000 --- a/include/asm-x86/mach-es7000/mach_wakecpu.h +++ /dev/null @@ -1,59 +0,0 @@ -#ifndef ASM_X86__MACH_ES7000__MACH_WAKECPU_H -#define ASM_X86__MACH_ES7000__MACH_WAKECPU_H - -/* - * This file copes with machines that wakeup secondary CPUs by the - * INIT, INIT, STARTUP sequence. - */ - -#ifdef CONFIG_ES7000_CLUSTERED_APIC -#define WAKE_SECONDARY_VIA_MIP -#else -#define WAKE_SECONDARY_VIA_INIT -#endif - -#ifdef WAKE_SECONDARY_VIA_MIP -extern int es7000_start_cpu(int cpu, unsigned long eip); -static inline int -wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) -{ - int boot_error = 0; - boot_error = es7000_start_cpu(phys_apicid, start_eip); - return boot_error; -} -#endif - -#define TRAMPOLINE_LOW phys_to_virt(0x467) -#define TRAMPOLINE_HIGH phys_to_virt(0x469) - -#define boot_cpu_apicid boot_cpu_physical_apicid - -static inline void wait_for_init_deassert(atomic_t *deassert) -{ -#ifdef WAKE_SECONDARY_VIA_INIT - while (!atomic_read(deassert)) - cpu_relax(); -#endif - return; -} - -/* Nothing to do for most platforms, since cleared by the INIT cycle */ -static inline void smp_callin_clear_local_apic(void) -{ -} - -static inline void store_NMI_vector(unsigned short *high, unsigned short *low) -{ -} - -static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) -{ -} - -#if APIC_DEBUG - #define inquire_remote_apic(apicid) __inquire_remote_apic(apicid) -#else - #define inquire_remote_apic(apicid) {} -#endif - -#endif /* ASM_X86__MACH_ES7000__MACH_WAKECPU_H */ -- cgit v0.10.2 From e8c48efdb971cfb1b043076cf68be535d461a20b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 25 Jul 2008 02:17:44 -0700 Subject: x86: mach_summit to summit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index d67ce5f..7b98785 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include static struct rio_table_hdr *rio_table_hdr __initdata; static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 752edd9..6ad6b67 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -11,11 +11,11 @@ #include #include #include -#include +#include #include -#include -#include -#include +#include +#include +#include static int probe_summit(void) { diff --git a/include/asm-x86/mach-summit/irq_vectors_limits.h b/include/asm-x86/mach-summit/irq_vectors_limits.h deleted file mode 100644 index 22f376a..0000000 --- a/include/asm-x86/mach-summit/irq_vectors_limits.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H -#define ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H - -/* - * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs, - * even with uni-proc kernels, so use a big array. - * - * This value should be the same in both the generic and summit subarches. - * Change one, change 'em both. - */ -#define NR_IRQS 224 -#define NR_IRQ_VECTORS 1024 - -#endif /* ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H */ diff --git a/include/asm-x86/mach-summit/mach_apic.h b/include/asm-x86/mach-summit/mach_apic.h deleted file mode 100644 index 7a66758..0000000 --- a/include/asm-x86/mach-summit/mach_apic.h +++ /dev/null @@ -1,185 +0,0 @@ -#ifndef ASM_X86__MACH_SUMMIT__MACH_APIC_H -#define ASM_X86__MACH_SUMMIT__MACH_APIC_H - -#include - -#define esr_disable (1) -#define NO_BALANCE_IRQ (0) - -/* In clustered mode, the high nibble of APIC ID is a cluster number. - * The low nibble is a 4-bit bitmap. */ -#define XAPIC_DEST_CPUS_SHIFT 4 -#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) -#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) - -#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) - -static inline cpumask_t target_cpus(void) -{ - /* CPU_MASK_ALL (0xff) has undefined behaviour with - * dest_LowestPrio mode logical clustered apic interrupt routing - * Just start on cpu 0. IRQ balancing will spread load - */ - return cpumask_of_cpu(0); -} -#define TARGET_CPUS (target_cpus()) - -#define INT_DELIVERY_MODE (dest_LowestPrio) -#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ - -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return 0; -} - -/* we don't use the phys_cpu_present_map to indicate apicid presence */ -static inline unsigned long check_apicid_present(int bit) -{ - return 1; -} - -#define apicid_cluster(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) - -extern u8 cpu_2_logical_apicid[]; - -static inline void init_apic_ldr(void) -{ - unsigned long val, id; - int count = 0; - u8 my_id = (u8)hard_smp_processor_id(); - u8 my_cluster = (u8)apicid_cluster(my_id); -#ifdef CONFIG_SMP - u8 lid; - int i; - - /* Create logical APIC IDs by counting CPUs already in cluster. */ - for (count = 0, i = NR_CPUS; --i >= 0; ) { - lid = cpu_2_logical_apicid[i]; - if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster) - ++count; - } -#endif - /* We only have a 4 wide bitmap in cluster mode. If a deranged - * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ - BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); - id = my_cluster | (1UL << count); - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - val |= SET_APIC_LOGICAL_ID(id); - apic_write(APIC_LDR, val); -} - -static inline int multi_timer_check(int apic, int irq) -{ - return 0; -} - -static inline int apic_id_registered(void) -{ - return 1; -} - -static inline void setup_apic_routing(void) -{ - printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", - nr_ioapics); -} - -static inline int apicid_to_node(int logical_apicid) -{ -#ifdef CONFIG_SMP - return apicid_2_node[hard_smp_processor_id()]; -#else - return 0; -#endif -} - -/* Mapping from cpu number to logical apicid */ -static inline int cpu_to_logical_apicid(int cpu) -{ -#ifdef CONFIG_SMP - if (cpu >= NR_CPUS) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; -#else - return logical_smp_processor_id(); -#endif -} - -static inline int cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < NR_CPUS) - return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_id_map) -{ - /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0x0F); -} - -static inline physid_mask_t apicid_to_cpu_present(int apicid) -{ - return physid_mask_of_physid(apicid); -} - -static inline void setup_portio_remap(void) -{ -} - -static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return 1; -} - -static inline void enable_apic_mode(void) -{ -} - -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) -{ - int num_bits_set; - int cpus_found = 0; - int cpu; - int apicid; - - num_bits_set = cpus_weight(cpumask); - /* Return id to all */ - if (num_bits_set == NR_CPUS) - return (int) 0xFF; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. - */ - cpu = first_cpu(cpumask); - apicid = cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ - printk ("%s: Not a valid mask!\n",__FUNCTION__); - return 0xFF; - } - apicid = apicid | new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - -/* cpuid returns the value latched in the HW at reset, not the APIC ID - * register's value. For any box whose BIOS changes APIC IDs, like - * clustered APIC systems, we must use hard_smp_processor_id. - * - * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. - */ -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) -{ - return hard_smp_processor_id() >> index_msb; -} - -#endif /* ASM_X86__MACH_SUMMIT__MACH_APIC_H */ diff --git a/include/asm-x86/mach-summit/mach_apicdef.h b/include/asm-x86/mach-summit/mach_apicdef.h deleted file mode 100644 index d4bc859..0000000 --- a/include/asm-x86/mach-summit/mach_apicdef.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef ASM_X86__MACH_SUMMIT__MACH_APICDEF_H -#define ASM_X86__MACH_SUMMIT__MACH_APICDEF_H - -#define APIC_ID_MASK (0xFF<<24) - -static inline unsigned get_apic_id(unsigned long x) -{ - return (((x)>>24)&0xFF); -} - -#define GET_APIC_ID(x) get_apic_id(x) - -#endif /* ASM_X86__MACH_SUMMIT__MACH_APICDEF_H */ diff --git a/include/asm-x86/mach-summit/mach_ipi.h b/include/asm-x86/mach-summit/mach_ipi.h deleted file mode 100644 index a3b31c5..0000000 --- a/include/asm-x86/mach-summit/mach_ipi.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef ASM_X86__MACH_SUMMIT__MACH_IPI_H -#define ASM_X86__MACH_SUMMIT__MACH_IPI_H - -void send_IPI_mask_sequence(cpumask_t mask, int vector); - -static inline void send_IPI_mask(cpumask_t mask, int vector) -{ - send_IPI_mask_sequence(mask, vector); -} - -static inline void send_IPI_allbutself(int vector) -{ - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - - if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); -} - -static inline void send_IPI_all(int vector) -{ - send_IPI_mask(cpu_online_map, vector); -} - -#endif /* ASM_X86__MACH_SUMMIT__MACH_IPI_H */ diff --git a/include/asm-x86/mach-summit/mach_mpparse.h b/include/asm-x86/mach-summit/mach_mpparse.h deleted file mode 100644 index 92396f2..0000000 --- a/include/asm-x86/mach-summit/mach_mpparse.h +++ /dev/null @@ -1,110 +0,0 @@ -#ifndef ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H -#define ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H - -#include -#include - -extern int use_cyclone; - -#ifdef CONFIG_X86_SUMMIT_NUMA -extern void setup_summit(void); -#else -#define setup_summit() {} -#endif - -static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid) -{ - if (!strncmp(oem, "IBM ENSW", 8) && - (!strncmp(productid, "VIGIL SMP", 9) - || !strncmp(productid, "EXA", 3) - || !strncmp(productid, "RUTHLESS SMP", 12))){ - mark_tsc_unstable("Summit based system"); - use_cyclone = 1; /*enable cyclone-timer*/ - setup_summit(); - return 1; - } - return 0; -} - -/* Hook from generic ACPI tables.c */ -static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - if (!strncmp(oem_id, "IBM", 3) && - (!strncmp(oem_table_id, "SERVIGIL", 8) - || !strncmp(oem_table_id, "EXA", 3))){ - mark_tsc_unstable("Summit based system"); - use_cyclone = 1; /*enable cyclone-timer*/ - setup_summit(); - return 1; - } - return 0; -} - -struct rio_table_hdr { - unsigned char version; /* Version number of this data structure */ - /* Version 3 adds chassis_num & WP_index */ - unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */ - unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */ -} __attribute__((packed)); - -struct scal_detail { - unsigned char node_id; /* Scalability Node ID */ - unsigned long CBAR; /* Address of 1MB register space */ - unsigned char port0node; /* Node ID port connected to: 0xFF=None */ - unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char port1node; /* Node ID port connected to: 0xFF = None */ - unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char port2node; /* Node ID port connected to: 0xFF = None */ - unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */ -} __attribute__((packed)); - -struct rio_detail { - unsigned char node_id; /* RIO Node ID */ - unsigned long BBAR; /* Address of 1MB register space */ - unsigned char type; /* Type of device */ - unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/ - /* For CYC: Node ID of Twister that owns this CYC */ - unsigned char port0node; /* Node ID port connected to: 0xFF=None */ - unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char port1node; /* Node ID port connected to: 0xFF=None */ - unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */ - /* For CYC: 0 */ - unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */ - /* = 0 : the XAPIC is not used, ie:*/ - /* ints fwded to another XAPIC */ - /* Bits1:7 Reserved */ - /* For CYC: Bits0:7 Reserved */ - unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */ - /* lower slot numbers/PCI bus numbers */ - /* For CYC: No meaning */ - unsigned char chassis_num; /* 1 based Chassis number */ - /* For LookOut WPEGs this field indicates the */ - /* Expansion Chassis #, enumerated from Boot */ - /* Node WPEG external port, then Boot Node CYC */ - /* external port, then Next Vigil chassis WPEG */ - /* external port, etc. */ - /* Shared Lookouts have only 1 chassis number (the */ - /* first one assigned) */ -} __attribute__((packed)); - - -typedef enum { - CompatTwister = 0, /* Compatibility Twister */ - AltTwister = 1, /* Alternate Twister of internal 8-way */ - CompatCyclone = 2, /* Compatibility Cyclone */ - AltCyclone = 3, /* Alternate Cyclone of internal 8-way */ - CompatWPEG = 4, /* Compatibility WPEG */ - AltWPEG = 5, /* Second Planar WPEG */ - LookOutAWPEG = 6, /* LookOut WPEG */ - LookOutBWPEG = 7, /* LookOut WPEG */ -} node_type; - -static inline int is_WPEG(struct rio_detail *rio){ - return (rio->type == CompatWPEG || rio->type == AltWPEG || - rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); -} - -#endif /* ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H */ diff --git a/include/asm-x86/summit/apic.h b/include/asm-x86/summit/apic.h new file mode 100644 index 0000000..c5b2e4b --- /dev/null +++ b/include/asm-x86/summit/apic.h @@ -0,0 +1,185 @@ +#ifndef __ASM_SUMMIT_APIC_H +#define __ASM_SUMMIT_APIC_H + +#include + +#define esr_disable (1) +#define NO_BALANCE_IRQ (0) + +/* In clustered mode, the high nibble of APIC ID is a cluster number. + * The low nibble is a 4-bit bitmap. */ +#define XAPIC_DEST_CPUS_SHIFT 4 +#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) +#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) + +#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) + +static inline cpumask_t target_cpus(void) +{ + /* CPU_MASK_ALL (0xff) has undefined behaviour with + * dest_LowestPrio mode logical clustered apic interrupt routing + * Just start on cpu 0. IRQ balancing will spread load + */ + return cpumask_of_cpu(0); +} +#define TARGET_CPUS (target_cpus()) + +#define INT_DELIVERY_MODE (dest_LowestPrio) +#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ + +static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return 0; +} + +/* we don't use the phys_cpu_present_map to indicate apicid presence */ +static inline unsigned long check_apicid_present(int bit) +{ + return 1; +} + +#define apicid_cluster(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) + +extern u8 cpu_2_logical_apicid[]; + +static inline void init_apic_ldr(void) +{ + unsigned long val, id; + int count = 0; + u8 my_id = (u8)hard_smp_processor_id(); + u8 my_cluster = (u8)apicid_cluster(my_id); +#ifdef CONFIG_SMP + u8 lid; + int i; + + /* Create logical APIC IDs by counting CPUs already in cluster. */ + for (count = 0, i = NR_CPUS; --i >= 0; ) { + lid = cpu_2_logical_apicid[i]; + if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster) + ++count; + } +#endif + /* We only have a 4 wide bitmap in cluster mode. If a deranged + * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ + BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); + id = my_cluster | (1UL << count); + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + val |= SET_APIC_LOGICAL_ID(id); + apic_write(APIC_LDR, val); +} + +static inline int multi_timer_check(int apic, int irq) +{ + return 0; +} + +static inline int apic_id_registered(void) +{ + return 1; +} + +static inline void setup_apic_routing(void) +{ + printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", + nr_ioapics); +} + +static inline int apicid_to_node(int logical_apicid) +{ +#ifdef CONFIG_SMP + return apicid_2_node[hard_smp_processor_id()]; +#else + return 0; +#endif +} + +/* Mapping from cpu number to logical apicid */ +static inline int cpu_to_logical_apicid(int cpu) +{ +#ifdef CONFIG_SMP + if (cpu >= NR_CPUS) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; +#else + return logical_smp_processor_id(); +#endif +} + +static inline int cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < NR_CPUS) + return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_id_map) +{ + /* For clustered we don't have a good way to do this yet - hack */ + return physids_promote(0x0F); +} + +static inline physid_mask_t apicid_to_cpu_present(int apicid) +{ + return physid_mask_of_physid(0); +} + +static inline void setup_portio_remap(void) +{ +} + +static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return 1; +} + +static inline void enable_apic_mode(void) +{ +} + +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +{ + int num_bits_set; + int cpus_found = 0; + int cpu; + int apicid; + + num_bits_set = cpus_weight(cpumask); + /* Return id to all */ + if (num_bits_set == NR_CPUS) + return (int) 0xFF; + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of TARGET_CPUS. + */ + cpu = first_cpu(cpumask); + apicid = cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { + if (cpu_isset(cpu, cpumask)) { + int new_apicid = cpu_to_logical_apicid(cpu); + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)){ + printk ("%s: Not a valid mask!\n",__FUNCTION__); + return 0xFF; + } + apicid = apicid | new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + +/* cpuid returns the value latched in the HW at reset, not the APIC ID + * register's value. For any box whose BIOS changes APIC IDs, like + * clustered APIC systems, we must use hard_smp_processor_id. + * + * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. + */ +static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +{ + return hard_smp_processor_id() >> index_msb; +} + +#endif /* __ASM_SUMMIT_APIC_H */ diff --git a/include/asm-x86/summit/apicdef.h b/include/asm-x86/summit/apicdef.h new file mode 100644 index 0000000..f3fbca1 --- /dev/null +++ b/include/asm-x86/summit/apicdef.h @@ -0,0 +1,13 @@ +#ifndef __ASM_SUMMIT_APICDEF_H +#define __ASM_SUMMIT_APICDEF_H + +#define APIC_ID_MASK (0xFF<<24) + +static inline unsigned get_apic_id(unsigned long x) +{ + return (x>>24)&0xFF; +} + +#define GET_APIC_ID(x) get_apic_id(x) + +#endif diff --git a/include/asm-x86/summit/ipi.h b/include/asm-x86/summit/ipi.h new file mode 100644 index 0000000..53bd1e7 --- /dev/null +++ b/include/asm-x86/summit/ipi.h @@ -0,0 +1,25 @@ +#ifndef __ASM_SUMMIT_IPI_H +#define __ASM_SUMMIT_IPI_H + +void send_IPI_mask_sequence(cpumask_t mask, int vector); + +static inline void send_IPI_mask(cpumask_t mask, int vector) +{ + send_IPI_mask_sequence(mask, vector); +} + +static inline void send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + send_IPI_mask(mask, vector); +} + +static inline void send_IPI_all(int vector) +{ + send_IPI_mask(cpu_online_map, vector); +} + +#endif /* __ASM_SUMMIT_IPI_H */ diff --git a/include/asm-x86/summit/irq_vectors_limits.h b/include/asm-x86/summit/irq_vectors_limits.h new file mode 100644 index 0000000..890ce3f --- /dev/null +++ b/include/asm-x86/summit/irq_vectors_limits.h @@ -0,0 +1,14 @@ +#ifndef _ASM_IRQ_VECTORS_LIMITS_H +#define _ASM_IRQ_VECTORS_LIMITS_H + +/* + * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs, + * even with uni-proc kernels, so use a big array. + * + * This value should be the same in both the generic and summit subarches. + * Change one, change 'em both. + */ +#define NR_IRQS 224 +#define NR_IRQ_VECTORS 1024 + +#endif /* _ASM_IRQ_VECTORS_LIMITS_H */ diff --git a/include/asm-x86/summit/mpparse.h b/include/asm-x86/summit/mpparse.h new file mode 100644 index 0000000..013ce6f --- /dev/null +++ b/include/asm-x86/summit/mpparse.h @@ -0,0 +1,109 @@ +#ifndef __ASM_SUMMIT_MPPARSE_H +#define __ASM_SUMMIT_MPPARSE_H + +#include + +extern int use_cyclone; + +#ifdef CONFIG_X86_SUMMIT_NUMA +extern void setup_summit(void); +#else +#define setup_summit() {} +#endif + +static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid) +{ + if (!strncmp(oem, "IBM ENSW", 8) && + (!strncmp(productid, "VIGIL SMP", 9) + || !strncmp(productid, "EXA", 3) + || !strncmp(productid, "RUTHLESS SMP", 12))){ + mark_tsc_unstable("Summit based system"); + use_cyclone = 1; /*enable cyclone-timer*/ + setup_summit(); + return 1; + } + return 0; +} + +/* Hook from generic ACPI tables.c */ +static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (!strncmp(oem_id, "IBM", 3) && + (!strncmp(oem_table_id, "SERVIGIL", 8) + || !strncmp(oem_table_id, "EXA", 3))){ + mark_tsc_unstable("Summit based system"); + use_cyclone = 1; /*enable cyclone-timer*/ + setup_summit(); + return 1; + } + return 0; +} + +struct rio_table_hdr { + unsigned char version; /* Version number of this data structure */ + /* Version 3 adds chassis_num & WP_index */ + unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */ + unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */ +} __attribute__((packed)); + +struct scal_detail { + unsigned char node_id; /* Scalability Node ID */ + unsigned long CBAR; /* Address of 1MB register space */ + unsigned char port0node; /* Node ID port connected to: 0xFF=None */ + unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char port1node; /* Node ID port connected to: 0xFF = None */ + unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char port2node; /* Node ID port connected to: 0xFF = None */ + unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */ +} __attribute__((packed)); + +struct rio_detail { + unsigned char node_id; /* RIO Node ID */ + unsigned long BBAR; /* Address of 1MB register space */ + unsigned char type; /* Type of device */ + unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/ + /* For CYC: Node ID of Twister that owns this CYC */ + unsigned char port0node; /* Node ID port connected to: 0xFF=None */ + unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char port1node; /* Node ID port connected to: 0xFF=None */ + unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */ + /* For CYC: 0 */ + unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */ + /* = 0 : the XAPIC is not used, ie:*/ + /* ints fwded to another XAPIC */ + /* Bits1:7 Reserved */ + /* For CYC: Bits0:7 Reserved */ + unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */ + /* lower slot numbers/PCI bus numbers */ + /* For CYC: No meaning */ + unsigned char chassis_num; /* 1 based Chassis number */ + /* For LookOut WPEGs this field indicates the */ + /* Expansion Chassis #, enumerated from Boot */ + /* Node WPEG external port, then Boot Node CYC */ + /* external port, then Next Vigil chassis WPEG */ + /* external port, etc. */ + /* Shared Lookouts have only 1 chassis number (the */ + /* first one assigned) */ +} __attribute__((packed)); + + +typedef enum { + CompatTwister = 0, /* Compatibility Twister */ + AltTwister = 1, /* Alternate Twister of internal 8-way */ + CompatCyclone = 2, /* Compatibility Cyclone */ + AltCyclone = 3, /* Alternate Cyclone of internal 8-way */ + CompatWPEG = 4, /* Compatibility WPEG */ + AltWPEG = 5, /* Second Planar WPEG */ + LookOutAWPEG = 6, /* LookOut WPEG */ + LookOutBWPEG = 7, /* LookOut WPEG */ +} node_type; + +static inline int is_WPEG(struct rio_detail *rio){ + return (rio->type == CompatWPEG || rio->type == AltWPEG || + rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); +} + +#endif /* __ASM_SUMMIT_MPPARSE_H */ -- cgit v0.10.2 From edb181ac4b0c7c1240503da46349a3fb69af9ef6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 25 Jul 2008 02:17:55 -0700 Subject: x86: mach-numaq to numaq Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 95c07ef..8cf5839 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -11,12 +11,12 @@ #include #include #include -#include +#include #include -#include -#include -#include -#include +#include +#include +#include +#include #include static int mps_oem_check(struct mp_config_table *mpc, char *oem, diff --git a/include/asm-x86/mach-numaq/mach_apic.h b/include/asm-x86/mach-numaq/mach_apic.h deleted file mode 100644 index 7a0d39e..0000000 --- a/include/asm-x86/mach-numaq/mach_apic.h +++ /dev/null @@ -1,138 +0,0 @@ -#ifndef ASM_X86__MACH_NUMAQ__MACH_APIC_H -#define ASM_X86__MACH_NUMAQ__MACH_APIC_H - -#include -#include -#include - -#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) - -static inline cpumask_t target_cpus(void) -{ - return CPU_MASK_ALL; -} - -#define TARGET_CPUS (target_cpus()) - -#define NO_BALANCE_IRQ (1) -#define esr_disable (1) - -#define INT_DELIVERY_MODE dest_LowestPrio -#define INT_DEST_MODE 0 /* physical delivery on LOCAL quad */ - -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return physid_isset(apicid, bitmap); -} -static inline unsigned long check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} -#define apicid_cluster(apicid) (apicid & 0xF0) - -static inline int apic_id_registered(void) -{ - return 1; -} - -static inline void init_apic_ldr(void) -{ - /* Already done in NUMA-Q firmware */ -} - -static inline void setup_apic_routing(void) -{ - printk("Enabling APIC mode: %s. Using %d I/O APICs\n", - "NUMA-Q", nr_ioapics); -} - -/* - * Skip adding the timer int on secondary nodes, which causes - * a small but painful rift in the time-space continuum. - */ -static inline int multi_timer_check(int apic, int irq) -{ - return apic != 0 && irq == 0; -} - -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) -{ - /* We don't have a good way to do this yet - hack */ - return physids_promote(0xFUL); -} - -/* Mapping from cpu number to logical apicid */ -extern u8 cpu_2_logical_apicid[]; -static inline int cpu_to_logical_apicid(int cpu) -{ - if (cpu >= NR_CPUS) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; -} - -/* - * Supporting over 60 cpus on NUMA-Q requires a locality-dependent - * cpu to APIC ID relation to properly interact with the intelligent - * mode of the cluster controller. - */ -static inline int cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < 60) - return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); - else - return BAD_APICID; -} - -static inline int apicid_to_node(int logical_apicid) -{ - return logical_apicid >> 4; -} - -static inline physid_mask_t apicid_to_cpu_present(int logical_apicid) -{ - int node = apicid_to_node(logical_apicid); - int cpu = __ffs(logical_apicid & 0xf); - - return physid_mask_of_physid(cpu + 4*node); -} - -extern void *xquad_portio; - -static inline void setup_portio_remap(void) -{ - int num_quads = num_online_nodes(); - - if (num_quads <= 1) - return; - - printk("Remapping cross-quad port I/O for %d quads\n", num_quads); - xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); - printk("xquad_portio vaddr 0x%08lx, len %08lx\n", - (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); -} - -static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return (1); -} - -static inline void enable_apic_mode(void) -{ -} - -/* - * We use physical apicids here, not logical, so just return the default - * physical broadcast to stop people from breaking us - */ -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) -{ - return (int) 0xF; -} - -/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -#endif /* ASM_X86__MACH_NUMAQ__MACH_APIC_H */ diff --git a/include/asm-x86/mach-numaq/mach_apicdef.h b/include/asm-x86/mach-numaq/mach_apicdef.h deleted file mode 100644 index f870ec5..0000000 --- a/include/asm-x86/mach-numaq/mach_apicdef.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef ASM_X86__MACH_NUMAQ__MACH_APICDEF_H -#define ASM_X86__MACH_NUMAQ__MACH_APICDEF_H - - -#define APIC_ID_MASK (0xF<<24) - -static inline unsigned get_apic_id(unsigned long x) -{ - return (((x)>>24)&0x0F); -} - -#define GET_APIC_ID(x) get_apic_id(x) - -#endif /* ASM_X86__MACH_NUMAQ__MACH_APICDEF_H */ diff --git a/include/asm-x86/mach-numaq/mach_ipi.h b/include/asm-x86/mach-numaq/mach_ipi.h deleted file mode 100644 index 1e83582..0000000 --- a/include/asm-x86/mach-numaq/mach_ipi.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef ASM_X86__MACH_NUMAQ__MACH_IPI_H -#define ASM_X86__MACH_NUMAQ__MACH_IPI_H - -void send_IPI_mask_sequence(cpumask_t, int vector); - -static inline void send_IPI_mask(cpumask_t mask, int vector) -{ - send_IPI_mask_sequence(mask, vector); -} - -static inline void send_IPI_allbutself(int vector) -{ - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - - if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); -} - -static inline void send_IPI_all(int vector) -{ - send_IPI_mask(cpu_online_map, vector); -} - -#endif /* ASM_X86__MACH_NUMAQ__MACH_IPI_H */ diff --git a/include/asm-x86/mach-numaq/mach_mpparse.h b/include/asm-x86/mach-numaq/mach_mpparse.h deleted file mode 100644 index 74ade18..0000000 --- a/include/asm-x86/mach-numaq/mach_mpparse.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H -#define ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H - -extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid); - -#endif /* ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H */ diff --git a/include/asm-x86/mach-numaq/mach_wakecpu.h b/include/asm-x86/mach-numaq/mach_wakecpu.h deleted file mode 100644 index 0db8cea..0000000 --- a/include/asm-x86/mach-numaq/mach_wakecpu.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H -#define ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H - -/* This file copes with machines that wakeup secondary CPUs by NMIs */ - -#define WAKE_SECONDARY_VIA_NMI - -#define TRAMPOLINE_LOW phys_to_virt(0x8) -#define TRAMPOLINE_HIGH phys_to_virt(0xa) - -#define boot_cpu_apicid boot_cpu_logical_apicid - -/* We don't do anything here because we use NMI's to boot instead */ -static inline void wait_for_init_deassert(atomic_t *deassert) -{ -} - -/* - * Because we use NMIs rather than the INIT-STARTUP sequence to - * bootstrap the CPUs, the APIC may be in a weird state. Kick it. - */ -static inline void smp_callin_clear_local_apic(void) -{ - clear_local_APIC(); -} - -static inline void store_NMI_vector(unsigned short *high, unsigned short *low) -{ - printk("Storing NMI vector\n"); - *high = *((volatile unsigned short *) TRAMPOLINE_HIGH); - *low = *((volatile unsigned short *) TRAMPOLINE_LOW); -} - -static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) -{ - printk("Restoring NMI vector\n"); - *((volatile unsigned short *) TRAMPOLINE_HIGH) = *high; - *((volatile unsigned short *) TRAMPOLINE_LOW) = *low; -} - -#define inquire_remote_apic(apicid) {} - -#endif /* ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H */ diff --git a/include/asm-x86/numaq/apic.h b/include/asm-x86/numaq/apic.h new file mode 100644 index 0000000..a8344ba --- /dev/null +++ b/include/asm-x86/numaq/apic.h @@ -0,0 +1,138 @@ +#ifndef __ASM_NUMAQ_APIC_H +#define __ASM_NUMAQ_APIC_H + +#include +#include +#include + +#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) + +static inline cpumask_t target_cpus(void) +{ + return CPU_MASK_ALL; +} + +#define TARGET_CPUS (target_cpus()) + +#define NO_BALANCE_IRQ (1) +#define esr_disable (1) + +#define INT_DELIVERY_MODE dest_LowestPrio +#define INT_DEST_MODE 0 /* physical delivery on LOCAL quad */ + +static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return physid_isset(apicid, bitmap); +} +static inline unsigned long check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} +#define apicid_cluster(apicid) (apicid & 0xF0) + +static inline int apic_id_registered(void) +{ + return 1; +} + +static inline void init_apic_ldr(void) +{ + /* Already done in NUMA-Q firmware */ +} + +static inline void setup_apic_routing(void) +{ + printk("Enabling APIC mode: %s. Using %d I/O APICs\n", + "NUMA-Q", nr_ioapics); +} + +/* + * Skip adding the timer int on secondary nodes, which causes + * a small but painful rift in the time-space continuum. + */ +static inline int multi_timer_check(int apic, int irq) +{ + return apic != 0 && irq == 0; +} + +static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) +{ + /* We don't have a good way to do this yet - hack */ + return physids_promote(0xFUL); +} + +/* Mapping from cpu number to logical apicid */ +extern u8 cpu_2_logical_apicid[]; +static inline int cpu_to_logical_apicid(int cpu) +{ + if (cpu >= NR_CPUS) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; +} + +/* + * Supporting over 60 cpus on NUMA-Q requires a locality-dependent + * cpu to APIC ID relation to properly interact with the intelligent + * mode of the cluster controller. + */ +static inline int cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < 60) + return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); + else + return BAD_APICID; +} + +static inline int apicid_to_node(int logical_apicid) +{ + return logical_apicid >> 4; +} + +static inline physid_mask_t apicid_to_cpu_present(int logical_apicid) +{ + int node = apicid_to_node(logical_apicid); + int cpu = __ffs(logical_apicid & 0xf); + + return physid_mask_of_physid(cpu + 4*node); +} + +extern void *xquad_portio; + +static inline void setup_portio_remap(void) +{ + int num_quads = num_online_nodes(); + + if (num_quads <= 1) + return; + + printk("Remapping cross-quad port I/O for %d quads\n", num_quads); + xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); + printk("xquad_portio vaddr 0x%08lx, len %08lx\n", + (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); +} + +static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return (1); +} + +static inline void enable_apic_mode(void) +{ +} + +/* + * We use physical apicids here, not logical, so just return the default + * physical broadcast to stop people from breaking us + */ +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +{ + return (int) 0xF; +} + +/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ +static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +#endif /* __ASM_NUMAQ_APIC_H */ diff --git a/include/asm-x86/numaq/apicdef.h b/include/asm-x86/numaq/apicdef.h new file mode 100644 index 0000000..e012a46 --- /dev/null +++ b/include/asm-x86/numaq/apicdef.h @@ -0,0 +1,14 @@ +#ifndef __ASM_NUMAQ_APICDEF_H +#define __ASM_NUMAQ_APICDEF_H + + +#define APIC_ID_MASK (0xF<<24) + +static inline unsigned get_apic_id(unsigned long x) +{ + return (((x)>>24)&0x0F); +} + +#define GET_APIC_ID(x) get_apic_id(x) + +#endif diff --git a/include/asm-x86/numaq/ipi.h b/include/asm-x86/numaq/ipi.h new file mode 100644 index 0000000..935588d2 --- /dev/null +++ b/include/asm-x86/numaq/ipi.h @@ -0,0 +1,25 @@ +#ifndef __ASM_NUMAQ_IPI_H +#define __ASM_NUMAQ_IPI_H + +void send_IPI_mask_sequence(cpumask_t, int vector); + +static inline void send_IPI_mask(cpumask_t mask, int vector) +{ + send_IPI_mask_sequence(mask, vector); +} + +static inline void send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + send_IPI_mask(mask, vector); +} + +static inline void send_IPI_all(int vector) +{ + send_IPI_mask(cpu_online_map, vector); +} + +#endif /* __ASM_NUMAQ_IPI_H */ diff --git a/include/asm-x86/numaq/mpparse.h b/include/asm-x86/numaq/mpparse.h new file mode 100644 index 0000000..252292e --- /dev/null +++ b/include/asm-x86/numaq/mpparse.h @@ -0,0 +1,7 @@ +#ifndef __ASM_NUMAQ_MPPARSE_H +#define __ASM_NUMAQ_MPPARSE_H + +extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid); + +#endif /* __ASM_NUMAQ_MPPARSE_H */ diff --git a/include/asm-x86/numaq/wakecpu.h b/include/asm-x86/numaq/wakecpu.h new file mode 100644 index 0000000..c577bda --- /dev/null +++ b/include/asm-x86/numaq/wakecpu.h @@ -0,0 +1,43 @@ +#ifndef __ASM_NUMAQ_WAKECPU_H +#define __ASM_NUMAQ_WAKECPU_H + +/* This file copes with machines that wakeup secondary CPUs by NMIs */ + +#define WAKE_SECONDARY_VIA_NMI + +#define TRAMPOLINE_LOW phys_to_virt(0x8) +#define TRAMPOLINE_HIGH phys_to_virt(0xa) + +#define boot_cpu_apicid boot_cpu_logical_apicid + +/* We don't do anything here because we use NMI's to boot instead */ +static inline void wait_for_init_deassert(atomic_t *deassert) +{ +} + +/* + * Because we use NMIs rather than the INIT-STARTUP sequence to + * bootstrap the CPUs, the APIC may be in a weird state. Kick it. + */ +static inline void smp_callin_clear_local_apic(void) +{ + clear_local_APIC(); +} + +static inline void store_NMI_vector(unsigned short *high, unsigned short *low) +{ + printk("Storing NMI vector\n"); + *high = *((volatile unsigned short *) TRAMPOLINE_HIGH); + *low = *((volatile unsigned short *) TRAMPOLINE_LOW); +} + +static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) +{ + printk("Restoring NMI vector\n"); + *((volatile unsigned short *) TRAMPOLINE_HIGH) = *high; + *((volatile unsigned short *) TRAMPOLINE_LOW) = *low; +} + +#define inquire_remote_apic(apicid) {} + +#endif /* __ASM_NUMAQ_WAKECPU_H */ -- cgit v0.10.2 From 39eacc20f93614f7bab63eb1d45060503afc46d0 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Fri, 25 Jul 2008 23:30:13 +0800 Subject: arch/x86/kernel/visws_quirks.c: Removed duplicated #include Removed duplicated #include in arch/x86/kernel/visws_quirks.c. asm/apic.h asm/arch_hooks.h asm/io.h asm/visws/cobalt.h asm/visws/lithium.h asm/visws/piix4.h linux/init.h linux/interrupt.h linux/smp.h Signed-off-by: Huang Weiyi Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 41e01b1..0c75691b 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -29,41 +29,26 @@ #include #include #include -#include #include #include #include "mach_apic.h" -#include -#include - #include -#include -#include -#include -#include #include #include -#include #include -#include #include #include -#include #include #include extern int no_broadcast; -#include #include -#include -#include -#include char visws_board_type = -1; char visws_board_rev = -1; -- cgit v0.10.2 From 9749986a878e91182ff027ff0010ab8e3211031a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 26 Jul 2008 17:28:11 +0200 Subject: x86: usb debug port early console, fix fix: arch/x86/kernel/built-in.o: In function `nvidia_set_debug_port': early_printk.c:(.text+0xf8b1): undefined reference to `read_pci_config' early_printk.c:(.text+0xf8dc): undefined reference to `write_pci_config' arch/x86/kernel/built-in.o: In function `setup_early_printk': early_printk.c:(.init.text+0x5487): undefined reference to `early_pci_allowed' early_printk.c:(.init.text+0x54cb): undefined reference to `read_pci_config' early_printk.c:(.init.text+0x54ec): undefined reference to `read_pci_config_16' [...] Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 93422d2..2a3dfbd 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -46,7 +46,7 @@ config EARLY_PRINTK config EARLY_PRINTK_DBGP bool "Early printk via EHCI debug port" default n - depends on EARLY_PRINTK + depends on EARLY_PRINTK && PCI help Write kernel log output directly into the EHCI debug port. -- cgit v0.10.2 From b56afe1d41653fb07ab1b5af5ccc12001c4dd5a0 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Thu, 24 Jul 2008 12:15:45 -0300 Subject: x86, xen: Use native_pte_flags instead of native_pte_val for .pte_flags Using native_pte_val triggers the BUG_ON() in the paravirt_ops version of pte_flags(). Signed-off-by: Eduardo Habkost Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 06219e6..e2767c2 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1347,7 +1347,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .ptep_modify_prot_commit = __ptep_modify_prot_commit, .pte_val = xen_pte_val, - .pte_flags = native_pte_val, + .pte_flags = native_pte_flags, .pgd_val = xen_pgd_val, .make_pte = xen_make_pte, -- cgit v0.10.2 From 3964cd3a6721f18ef1dd67b9a0a89dc5b36683b9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 26 Jul 2008 19:35:20 +0200 Subject: x86: visws_quirks, fix build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: arch/x86/kernel/visws_quirks.c: In function ‘visws_early_detect’: arch/x86/kernel/visws_quirks.c:290: error: ‘skip_ioapic_setup’ undeclared (first use in this function) arch/x86/kernel/visws_quirks.c:290: error: (Each undeclared identifier is reported only once arch/x86/kernel/visws_quirks.c:290: error: for each function it appears in.) Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 0c75691b..3059eb4 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include -- cgit v0.10.2 From 36a033082b5243d45d508c5ccd47a754edbc6821 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 14 Mar 2008 17:46:38 -0700 Subject: x86: tracehook_signal_handler This makes the x86 signal handling code use tracehook_signal_handler() in place of calling into ptrace guts. The call is moved after the sa_mask processing, but there is no other change. This cleanup doesn't matter to existing debuggers, but is the sensible thing: have all facets of the handler setup complete before the debugger inspects the task again. Signed-off-by: Roland McGrath diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 6fb5bcd..22aae16 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -558,8 +559,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, * handler too. */ regs->flags &= ~X86_EFLAGS_TF; - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); @@ -568,6 +567,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); + tracehook_signal_handler(sig, info, ka, regs, + test_thread_flag(TIF_SINGLESTEP)); + return 0; } diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index b45ef8d..3beb2db 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -444,8 +445,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, * handler too. */ regs->flags &= ~X86_EFLAGS_TF; - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); @@ -453,6 +452,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); + + tracehook_signal_handler(sig, info, ka, regs, + test_thread_flag(TIF_SINGLESTEP)); } return ret; -- cgit v0.10.2 From eeea3c3ff8af7f6960a0515d46dff6479bdb91f9 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sun, 16 Mar 2008 23:36:28 -0700 Subject: x86: tracehook syscall This changes x86 syscall tracing to use the new tracehook.h entry points. There is no change, only cleanup. Signed-off-by: Roland McGrath diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index e37dccc..19a7d2c 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -1375,30 +1376,6 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) force_sig_info(SIGTRAP, &info, tsk); } -static void syscall_trace(struct pt_regs *regs) -{ - if (!(current->ptrace & PT_PTRACED)) - return; - -#if 0 - printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", - current->comm, - regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0), - current_thread_info()->flags, current->ptrace); -#endif - - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) - ? 0x80 : 0)); - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } -} #ifdef CONFIG_X86_32 # define IS_IA32 1 @@ -1432,8 +1409,9 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) ret = -1L; - if (ret || test_thread_flag(TIF_SYSCALL_TRACE)) - syscall_trace(regs); + if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && + tracehook_report_syscall_entry(regs)) + ret = -1L; if (unlikely(current->audit_context)) { if (IS_IA32) @@ -1459,7 +1437,7 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); if (test_thread_flag(TIF_SYSCALL_TRACE)) - syscall_trace(regs); + tracehook_report_syscall_exit(regs, 0); /* * If TIF_SYSCALL_EMU is set, we only get here because of @@ -1475,6 +1453,6 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) * system call instruction. */ if (test_thread_flag(TIF_SINGLESTEP) && - (current->ptrace & PT_PTRACED)) + tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL)) send_sigtrap(current, regs, 0); } -- cgit v0.10.2 From 68bd0f4ef7750fc277e1268bf40f443898382409 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 18 Apr 2008 17:08:44 -0700 Subject: x86: tracehook: asm/syscall.h Add asm/syscall.h for x86 with all the required entry points. This will allow arch-independent tracing code for system calls. Signed-off-by: Roland McGrath diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h index 8a71db8..91a77f5 100644 --- a/include/asm-x86/ptrace.h +++ b/include/asm-x86/ptrace.h @@ -213,6 +213,11 @@ static inline unsigned long frame_pointer(struct pt_regs *regs) return regs->bp; } +static inline unsigned long user_stack_pointer(struct pt_regs *regs) +{ + return regs->sp; +} + /* * These are defined as per linux/ptrace.h, which see. */ diff --git a/include/asm-x86/syscall.h b/include/asm-x86/syscall.h new file mode 100644 index 0000000..6f29389 --- /dev/null +++ b/include/asm-x86/syscall.h @@ -0,0 +1,210 @@ +/* + * Access to user system call parameters and results + * + * Copyright (C) 2008 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + * + * See asm-generic/syscall.h for descriptions of what we must do here. + */ + +#ifndef _ASM_SYSCALL_H +#define _ASM_SYSCALL_H 1 + +#include + +static inline long syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) +{ + /* + * We always sign-extend a -1 value being set here, + * so this is always either -1L or a syscall number. + */ + return regs->orig_ax; +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + regs->ax = regs->orig_ax; +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + unsigned long error = regs->ax; +#ifdef CONFIG_IA32_EMULATION + /* + * TS_COMPAT is set for 32-bit syscall entries and then + * remains set until we return to user mode. + */ + if (task_thread_info(task)->status & TS_COMPAT) + /* + * Sign-extend the value so (int)-EFOO becomes (long)-EFOO + * and will match correctly in comparisons. + */ + error = (long) (int) error; +#endif + return error >= -4095L ? error : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->ax; +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + regs->ax = (long) error ?: val; +} + +#ifdef CONFIG_X86_32 + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + BUG_ON(i + n > 6); + memcpy(args, ®s->bx + i, n * sizeof(args[0])); +} + +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + const unsigned long *args) +{ + BUG_ON(i + n > 6); + memcpy(®s->bx + i, args, n * sizeof(args[0])); +} + +#else /* CONFIG_X86_64 */ + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ +# ifdef CONFIG_IA32_EMULATION + if (task_thread_info(task)->status & TS_COMPAT) + switch (i + n) { + case 6: + if (!n--) break; + *args++ = regs->bp; + case 5: + if (!n--) break; + *args++ = regs->di; + case 4: + if (!n--) break; + *args++ = regs->si; + case 3: + if (!n--) break; + *args++ = regs->dx; + case 2: + if (!n--) break; + *args++ = regs->cx; + case 1: + if (!n--) break; + *args++ = regs->bx; + case 0: + if (!n--) break; + default: + BUG(); + break; + } + else +# endif + switch (i + n) { + case 6: + if (!n--) break; + *args++ = regs->r9; + case 5: + if (!n--) break; + *args++ = regs->r8; + case 4: + if (!n--) break; + *args++ = regs->r10; + case 3: + if (!n--) break; + *args++ = regs->dx; + case 2: + if (!n--) break; + *args++ = regs->si; + case 1: + if (!n--) break; + *args++ = regs->di; + case 0: + if (!n--) break; + default: + BUG(); + break; + } +} + +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + const unsigned long *args) +{ +# ifdef CONFIG_IA32_EMULATION + if (task_thread_info(task)->status & TS_COMPAT) + switch (i + n) { + case 6: + if (!n--) break; + regs->bp = *args++; + case 5: + if (!n--) break; + regs->di = *args++; + case 4: + if (!n--) break; + regs->si = *args++; + case 3: + if (!n--) break; + regs->dx = *args++; + case 2: + if (!n--) break; + regs->cx = *args++; + case 1: + if (!n--) break; + regs->bx = *args++; + case 0: + if (!n--) break; + default: + BUG(); + } + else +# endif + switch (i + n) { + case 6: + if (!n--) break; + regs->r9 = *args++; + case 5: + if (!n--) break; + regs->r8 = *args++; + case 4: + if (!n--) break; + regs->r10 = *args++; + case 3: + if (!n--) break; + regs->dx = *args++; + case 2: + if (!n--) break; + regs->si = *args++; + case 1: + if (!n--) break; + regs->di = *args++; + case 0: + if (!n--) break; + default: + BUG(); + } +} + +#endif /* CONFIG_X86_32 */ + +#endif /* _ASM_SYSCALL_H */ -- cgit v0.10.2 From 4dfcbb997aa9f3a6a3ed8c192f0dac28b027e08f Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sat, 19 Apr 2008 15:37:09 -0700 Subject: x86 signals: use asm/syscall.h Replace local inlines with the asm/syscall.h interfaces that do the same things. Signed-off-by: Roland McGrath diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 3beb2db..cb7cf02 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "sigframe.h" #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -347,35 +348,6 @@ give_sigsegv: } /* - * Return -1L or the syscall number that @regs is executing. - */ -static long current_syscall(struct pt_regs *regs) -{ - /* - * We always sign-extend a -1 value being set here, - * so this is always either -1L or a syscall number. - */ - return regs->orig_ax; -} - -/* - * Return a value that is -EFOO if the system call in @regs->orig_ax - * returned an error. This only works for @regs from @current. - */ -static long current_syscall_ret(struct pt_regs *regs) -{ -#ifdef CONFIG_IA32_EMULATION - if (test_thread_flag(TIF_IA32)) - /* - * Sign-extend the value so (int)-EFOO becomes (long)-EFOO - * and will match correctly in comparisons. - */ - return (int) regs->ax; -#endif - return regs->ax; -} - -/* * OK, we're invoking a handler */ @@ -386,9 +358,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, int ret; /* Are we from a system call? */ - if (current_syscall(regs) >= 0) { + if (syscall_get_nr(current, regs) >= 0) { /* If so, check system call restarting.. */ - switch (current_syscall_ret(regs)) { + switch (syscall_get_error(current, regs)) { case -ERESTART_RESTARTBLOCK: case -ERESTARTNOHAND: regs->ax = -EINTR; @@ -511,9 +483,9 @@ static void do_signal(struct pt_regs *regs) } /* Did we come from a system call? */ - if (current_syscall(regs) >= 0) { + if (syscall_get_nr(current, regs) >= 0) { /* Restart the system call - no handlers present */ - switch (current_syscall_ret(regs)) { + switch (syscall_get_error(current, regs)) { case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: -- cgit v0.10.2 From 59e52130f04537d2c80ea44bb007cadd1ad29543 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sat, 19 Apr 2008 19:10:57 -0700 Subject: x86: tracehook: TIF_NOTIFY_RESUME This adds TIF_NOTIFY_RESUME support for x86, both 64-bit and 32-bit. When set, we call tracehook_notify_resume() on the way to user mode. Signed-off-by: Roland McGrath diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 22aae16..4445d26 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -663,5 +663,10 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } + clear_thread_flag(TIF_IRET); } diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index cb7cf02..d01e3f6 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -523,6 +523,11 @@ void do_notify_resume(struct pt_regs *regs, void *unused, /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); + + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index da0a675..4cd5b7b 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -71,6 +71,7 @@ struct thread_info { * Warning: layout of LSW is hardcoded in entry.S */ #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ @@ -93,6 +94,7 @@ struct thread_info { #define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) @@ -133,7 +135,7 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING|_TIF_MCE_NOTIFY) + (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ -- cgit v0.10.2 From 99bbc4b1e677ac695431e8d9c8e710ef391c567f Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sun, 20 Apr 2008 14:35:12 -0700 Subject: x86: tracehook: CONFIG_HAVE_ARCH_TRACEHOOK The x86 arch code has all the prerequisites, so set HAVE_ARCH_TRACEHOOK. Signed-off-by: Roland McGrath diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b6fa287..f463a8a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -30,6 +30,7 @@ config X86 select HAVE_FTRACE select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER + select HAVE_ARCH_TRACEHOOK select HAVE_EFFICIENT_UNALIGNED_ACCESS config ARCH_DEFCONFIG -- cgit v0.10.2 From d25ae38b7e005af03843833bbd811ffe8c5f8cb4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 25 Jul 2008 19:39:03 -0700 Subject: x86: add apic probe for genapic 64bit - fix intr_remapping_enabled get assigned later, so need to check that in setup_apic_routing Signed-off-by: Yinghai Lu Cc: Jack Steiner Cc: Suresh Siddha Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index b3ba969..6c9bfc9 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -42,6 +43,11 @@ static struct genapic *apic_probe[] __initdata = { */ void __init setup_apic_routing(void) { + if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) { + if (!intr_remapping_enabled) + genapic = &apic_flat; + } + if (genapic == &apic_flat) { if (max_physical_apicid >= 8) genapic = &apic_physflat; diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index ef3f318..fed9f68 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -14,7 +14,7 @@ DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (cpu_has_x2apic && intr_remapping_enabled) + if (cpu_has_x2apic) return 1; return 0; diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 3229c68..958d537 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -21,7 +21,7 @@ early_param("x2apic_phys", set_x2apic_phys_mode); static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (cpu_has_x2apic && intr_remapping_enabled && x2apic_phys) + if (cpu_has_x2apic && x2apic_phys) return 1; return 0; -- cgit v0.10.2 From 64f53a0492b4bc11868307990bb8f7c1e0764f89 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 27 Jul 2008 08:42:32 -0700 Subject: x86: fix initialization of 'l' bit in ldt descriptors Make sure that fill_ldt() initializes the 'l' bit in the descriptor. It always sets it to 0, ignoring 'lm' in user_desc, preserving original x86_64 behaviour. Previously it was leaving 'l' uninitialized. Signed-off-by: Jeremy Fitzhardinge Cc: Glauber de Oliveira Costa Signed-off-by: Ingo Molnar Cc: Glauber de Oliveira Costa diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h index 24a524f..06f786f 100644 --- a/include/asm-x86/desc.h +++ b/include/asm-x86/desc.h @@ -24,6 +24,11 @@ static inline void fill_ldt(struct desc_struct *desc, desc->d = info->seg_32bit; desc->g = info->limit_in_pages; desc->base2 = (info->base_addr & 0xff000000) >> 24; + /* + * Don't allow setting of the lm bit. It is useless anyway + * because 64bit system calls require __USER_CS: + */ + desc->l = 0; } extern struct desc_ptr idt_descr; -- cgit v0.10.2 From a05d2ebab28011c2f3f520833f4bfdd2fd1b9c02 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 27 Jul 2008 08:45:02 -0700 Subject: xen: fix allocation and use of large ldts When the ldt gets to more than 1 page in size, the kernel uses vmalloc to allocate it. This means that: - when making the ldt RO, we must update the pages in both the vmalloc mapping and the linear mapping to make sure there are no RW aliases. - we need to use arbitrary_virt_to_machine to compute the machine addr for each update Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index e2767c2..b011e4a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -325,24 +325,55 @@ static unsigned long xen_store_tr(void) return 0; } +/* + * If 'v' is a vmalloc mapping, then find the linear mapping of the + * page (if any) and also set its protections to match: + */ +static void set_aliased_prot(void *v, pgprot_t prot) +{ + int level; + pte_t *ptep; + pte_t pte; + unsigned long pfn; + struct page *page; + + ptep = lookup_address((unsigned long)v, &level); + BUG_ON(ptep == NULL); + + pfn = pte_pfn(*ptep); + page = pfn_to_page(pfn); + + pte = pfn_pte(pfn, prot); + + if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) + BUG(); + + if (!PageHighMem(page)) { + void *av = __va(PFN_PHYS(pfn)); + + if (av != v) + if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0)) + BUG(); + } else + kmap_flush_unused(); +} + static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) { - unsigned pages = roundup(entries * LDT_ENTRY_SIZE, PAGE_SIZE); - void *v = ldt; + const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; int i; - for(i = 0; i < pages; i += PAGE_SIZE) - make_lowmem_page_readonly(v + i); + for(i = 0; i < entries; i += entries_per_page) + set_aliased_prot(ldt + i, PAGE_KERNEL_RO); } static void xen_free_ldt(struct desc_struct *ldt, unsigned entries) { - unsigned pages = roundup(entries * LDT_ENTRY_SIZE, PAGE_SIZE); - void *v = ldt; + const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; int i; - for(i = 0; i < pages; i += PAGE_SIZE) - make_lowmem_page_readwrite(v + i); + for(i = 0; i < entries; i += entries_per_page) + set_aliased_prot(ldt + i, PAGE_KERNEL); } static void xen_set_ldt(const void *addr, unsigned entries) @@ -446,7 +477,7 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, const void *ptr) { unsigned long lp = (unsigned long)&dt[entrynum]; - xmaddr_t mach_lp = virt_to_machine(lp); + xmaddr_t mach_lp = arbitrary_virt_to_machine(lp); u64 entry = *(u64 *)ptr; preempt_disable(); @@ -579,7 +610,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, } static void xen_load_sp0(struct tss_struct *tss, - struct thread_struct *thread) + struct thread_struct *thread) { struct multicall_space mcs = xen_mc_entry(0); MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); -- cgit v0.10.2 From 7225e75144b9718cbbe1820d9c011c809d5773fd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 26 Jul 2008 17:54:22 -0700 Subject: documentation: move mtrr.txt to Doc/x86/ subdir Move mtrr.txt to the Documentation/x86/ subdirectory. Add 00-INDEX to the Documentation/x86/ subdirectory. Signed-off-by: Randy Dunlap Cc: Adrian Bunk Signed-off-by: Ingo Molnar diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 1977fab..661b6cc 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -253,8 +253,6 @@ mono.txt - how to execute Mono-based .NET binaries with the help of BINFMT_MISC. moxa-smartio - file with info on installing/using Moxa multiport serial driver. -mtrr.txt - - how to use PPro Memory Type Range Registers to increase performance. mutex-design.txt - info on the generic mutex subsystem. namespaces/ diff --git a/Documentation/mtrr.txt b/Documentation/mtrr.txt deleted file mode 100644 index c39ac39..0000000 --- a/Documentation/mtrr.txt +++ /dev/null @@ -1,305 +0,0 @@ -MTRR (Memory Type Range Register) control -3 Jun 1999 -Richard Gooch - - - On Intel P6 family processors (Pentium Pro, Pentium II and later) - the Memory Type Range Registers (MTRRs) may be used to control - processor access to memory ranges. This is most useful when you have - a video (VGA) card on a PCI or AGP bus. Enabling write-combining - allows bus write transfers to be combined into a larger transfer - before bursting over the PCI/AGP bus. This can increase performance - of image write operations 2.5 times or more. - - The Cyrix 6x86, 6x86MX and M II processors have Address Range - Registers (ARRs) which provide a similar functionality to MTRRs. For - these, the ARRs are used to emulate the MTRRs. - - The AMD K6-2 (stepping 8 and above) and K6-3 processors have two - MTRRs. These are supported. The AMD Athlon family provide 8 Intel - style MTRRs. - - The Centaur C6 (WinChip) has 8 MCRs, allowing write-combining. These - are supported. - - The VIA Cyrix III and VIA C3 CPUs offer 8 Intel style MTRRs. - - The CONFIG_MTRR option creates a /proc/mtrr file which may be used - to manipulate your MTRRs. Typically the X server should use - this. This should have a reasonably generic interface so that - similar control registers on other processors can be easily - supported. - - -There are two interfaces to /proc/mtrr: one is an ASCII interface -which allows you to read and write. The other is an ioctl() -interface. The ASCII interface is meant for administration. The -ioctl() interface is meant for C programs (i.e. the X server). The -interfaces are described below, with sample commands and C code. - -=============================================================================== -Reading MTRRs from the shell: - -% cat /proc/mtrr -reg00: base=0x00000000 ( 0MB), size= 128MB: write-back, count=1 -reg01: base=0x08000000 ( 128MB), size= 64MB: write-back, count=1 -=============================================================================== -Creating MTRRs from the C-shell: -# echo "base=0xf8000000 size=0x400000 type=write-combining" >! /proc/mtrr -or if you use bash: -# echo "base=0xf8000000 size=0x400000 type=write-combining" >| /proc/mtrr - -And the result thereof: -% cat /proc/mtrr -reg00: base=0x00000000 ( 0MB), size= 128MB: write-back, count=1 -reg01: base=0x08000000 ( 128MB), size= 64MB: write-back, count=1 -reg02: base=0xf8000000 (3968MB), size= 4MB: write-combining, count=1 - -This is for video RAM at base address 0xf8000000 and size 4 megabytes. To -find out your base address, you need to look at the output of your X -server, which tells you where the linear framebuffer address is. A -typical line that you may get is: - -(--) S3: PCI: 968 rev 0, Linear FB @ 0xf8000000 - -Note that you should only use the value from the X server, as it may -move the framebuffer base address, so the only value you can trust is -that reported by the X server. - -To find out the size of your framebuffer (what, you don't actually -know?), the following line will tell you: - -(--) S3: videoram: 4096k - -That's 4 megabytes, which is 0x400000 bytes (in hexadecimal). -A patch is being written for XFree86 which will make this automatic: -in other words the X server will manipulate /proc/mtrr using the -ioctl() interface, so users won't have to do anything. If you use a -commercial X server, lobby your vendor to add support for MTRRs. -=============================================================================== -Creating overlapping MTRRs: - -%echo "base=0xfb000000 size=0x1000000 type=write-combining" >/proc/mtrr -%echo "base=0xfb000000 size=0x1000 type=uncachable" >/proc/mtrr - -And the results: cat /proc/mtrr -reg00: base=0x00000000 ( 0MB), size= 64MB: write-back, count=1 -reg01: base=0xfb000000 (4016MB), size= 16MB: write-combining, count=1 -reg02: base=0xfb000000 (4016MB), size= 4kB: uncachable, count=1 - -Some cards (especially Voodoo Graphics boards) need this 4 kB area -excluded from the beginning of the region because it is used for -registers. - -NOTE: You can only create type=uncachable region, if the first -region that you created is type=write-combining. -=============================================================================== -Removing MTRRs from the C-shell: -% echo "disable=2" >! /proc/mtrr -or using bash: -% echo "disable=2" >| /proc/mtrr -=============================================================================== -Reading MTRRs from a C program using ioctl()'s: - -/* mtrr-show.c - - Source file for mtrr-show (example program to show MTRRs using ioctl()'s) - - Copyright (C) 1997-1998 Richard Gooch - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - Richard Gooch may be reached by email at rgooch@atnf.csiro.au - The postal address is: - Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. -*/ - -/* - This program will use an ioctl() on /proc/mtrr to show the current MTRR - settings. This is an alternative to reading /proc/mtrr. - - - Written by Richard Gooch 17-DEC-1997 - - Last updated by Richard Gooch 2-MAY-1998 - - -*/ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define TRUE 1 -#define FALSE 0 -#define ERRSTRING strerror (errno) - -static char *mtrr_strings[MTRR_NUM_TYPES] = -{ - "uncachable", /* 0 */ - "write-combining", /* 1 */ - "?", /* 2 */ - "?", /* 3 */ - "write-through", /* 4 */ - "write-protect", /* 5 */ - "write-back", /* 6 */ -}; - -int main () -{ - int fd; - struct mtrr_gentry gentry; - - if ( ( fd = open ("/proc/mtrr", O_RDONLY, 0) ) == -1 ) - { - if (errno == ENOENT) - { - fputs ("/proc/mtrr not found: not supported or you don't have a PPro?\n", - stderr); - exit (1); - } - fprintf (stderr, "Error opening /proc/mtrr\t%s\n", ERRSTRING); - exit (2); - } - for (gentry.regnum = 0; ioctl (fd, MTRRIOC_GET_ENTRY, &gentry) == 0; - ++gentry.regnum) - { - if (gentry.size < 1) - { - fprintf (stderr, "Register: %u disabled\n", gentry.regnum); - continue; - } - fprintf (stderr, "Register: %u base: 0x%lx size: 0x%lx type: %s\n", - gentry.regnum, gentry.base, gentry.size, - mtrr_strings[gentry.type]); - } - if (errno == EINVAL) exit (0); - fprintf (stderr, "Error doing ioctl(2) on /dev/mtrr\t%s\n", ERRSTRING); - exit (3); -} /* End Function main */ -=============================================================================== -Creating MTRRs from a C programme using ioctl()'s: - -/* mtrr-add.c - - Source file for mtrr-add (example programme to add an MTRRs using ioctl()) - - Copyright (C) 1997-1998 Richard Gooch - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - Richard Gooch may be reached by email at rgooch@atnf.csiro.au - The postal address is: - Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. -*/ - -/* - This programme will use an ioctl() on /proc/mtrr to add an entry. The first - available mtrr is used. This is an alternative to writing /proc/mtrr. - - - Written by Richard Gooch 17-DEC-1997 - - Last updated by Richard Gooch 2-MAY-1998 - - -*/ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define TRUE 1 -#define FALSE 0 -#define ERRSTRING strerror (errno) - -static char *mtrr_strings[MTRR_NUM_TYPES] = -{ - "uncachable", /* 0 */ - "write-combining", /* 1 */ - "?", /* 2 */ - "?", /* 3 */ - "write-through", /* 4 */ - "write-protect", /* 5 */ - "write-back", /* 6 */ -}; - -int main (int argc, char **argv) -{ - int fd; - struct mtrr_sentry sentry; - - if (argc != 4) - { - fprintf (stderr, "Usage:\tmtrr-add base size type\n"); - exit (1); - } - sentry.base = strtoul (argv[1], NULL, 0); - sentry.size = strtoul (argv[2], NULL, 0); - for (sentry.type = 0; sentry.type < MTRR_NUM_TYPES; ++sentry.type) - { - if (strcmp (argv[3], mtrr_strings[sentry.type]) == 0) break; - } - if (sentry.type >= MTRR_NUM_TYPES) - { - fprintf (stderr, "Illegal type: \"%s\"\n", argv[3]); - exit (2); - } - if ( ( fd = open ("/proc/mtrr", O_WRONLY, 0) ) == -1 ) - { - if (errno == ENOENT) - { - fputs ("/proc/mtrr not found: not supported or you don't have a PPro?\n", - stderr); - exit (3); - } - fprintf (stderr, "Error opening /proc/mtrr\t%s\n", ERRSTRING); - exit (4); - } - if (ioctl (fd, MTRRIOC_ADD_ENTRY, &sentry) == -1) - { - fprintf (stderr, "Error doing ioctl(2) on /dev/mtrr\t%s\n", ERRSTRING); - exit (5); - } - fprintf (stderr, "Sleeping for 5 seconds so you can see the new entry\n"); - sleep (5); - close (fd); - fputs ("I've just closed /proc/mtrr so now the new entry should be gone\n", - stderr); -} /* End Function main */ -=============================================================================== diff --git a/Documentation/x86/00-INDEX b/Documentation/x86/00-INDEX new file mode 100644 index 0000000..dbe3377 --- /dev/null +++ b/Documentation/x86/00-INDEX @@ -0,0 +1,4 @@ +00-INDEX + - this file +mtrr.txt + - how to use x86 Memory Type Range Registers to increase performance diff --git a/Documentation/x86/mtrr.txt b/Documentation/x86/mtrr.txt new file mode 100644 index 0000000..cc071dc --- /dev/null +++ b/Documentation/x86/mtrr.txt @@ -0,0 +1,305 @@ +MTRR (Memory Type Range Register) control +3 Jun 1999 +Richard Gooch + + + On Intel P6 family processors (Pentium Pro, Pentium II and later) + the Memory Type Range Registers (MTRRs) may be used to control + processor access to memory ranges. This is most useful when you have + a video (VGA) card on a PCI or AGP bus. Enabling write-combining + allows bus write transfers to be combined into a larger transfer + before bursting over the PCI/AGP bus. This can increase performance + of image write operations 2.5 times or more. + + The Cyrix 6x86, 6x86MX and M II processors have Address Range + Registers (ARRs) which provide a similar functionality to MTRRs. For + these, the ARRs are used to emulate the MTRRs. + + The AMD K6-2 (stepping 8 and above) and K6-3 processors have two + MTRRs. These are supported. The AMD Athlon family provide 8 Intel + style MTRRs. + + The Centaur C6 (WinChip) has 8 MCRs, allowing write-combining. These + are supported. + + The VIA Cyrix III and VIA C3 CPUs offer 8 Intel style MTRRs. + + The CONFIG_MTRR option creates a /proc/mtrr file which may be used + to manipulate your MTRRs. Typically the X server should use + this. This should have a reasonably generic interface so that + similar control registers on other processors can be easily + supported. + + +There are two interfaces to /proc/mtrr: one is an ASCII interface +which allows you to read and write. The other is an ioctl() +interface. The ASCII interface is meant for administration. The +ioctl() interface is meant for C programs (i.e. the X server). The +interfaces are described below, with sample commands and C code. + +=============================================================================== +Reading MTRRs from the shell: + +% cat /proc/mtrr +reg00: base=0x00000000 ( 0MB), size= 128MB: write-back, count=1 +reg01: base=0x08000000 ( 128MB), size= 64MB: write-back, count=1 +=============================================================================== +Creating MTRRs from the C-shell: +# echo "base=0xf8000000 size=0x400000 type=write-combining" >! /proc/mtrr +or if you use bash: +# echo "base=0xf8000000 size=0x400000 type=write-combining" >| /proc/mtrr + +And the result thereof: +% cat /proc/mtrr +reg00: base=0x00000000 ( 0MB), size= 128MB: write-back, count=1 +reg01: base=0x08000000 ( 128MB), size= 64MB: write-back, count=1 +reg02: base=0xf8000000 (3968MB), size= 4MB: write-combining, count=1 + +This is for video RAM at base address 0xf8000000 and size 4 megabytes. To +find out your base address, you need to look at the output of your X +server, which tells you where the linear framebuffer address is. A +typical line that you may get is: + +(--) S3: PCI: 968 rev 0, Linear FB @ 0xf8000000 + +Note that you should only use the value from the X server, as it may +move the framebuffer base address, so the only value you can trust is +that reported by the X server. + +To find out the size of your framebuffer (what, you don't actually +know?), the following line will tell you: + +(--) S3: videoram: 4096k + +That's 4 megabytes, which is 0x400000 bytes (in hexadecimal). +A patch is being written for XFree86 which will make this automatic: +in other words the X server will manipulate /proc/mtrr using the +ioctl() interface, so users won't have to do anything. If you use a +commercial X server, lobby your vendor to add support for MTRRs. +=============================================================================== +Creating overlapping MTRRs: + +%echo "base=0xfb000000 size=0x1000000 type=write-combining" >/proc/mtrr +%echo "base=0xfb000000 size=0x1000 type=uncachable" >/proc/mtrr + +And the results: cat /proc/mtrr +reg00: base=0x00000000 ( 0MB), size= 64MB: write-back, count=1 +reg01: base=0xfb000000 (4016MB), size= 16MB: write-combining, count=1 +reg02: base=0xfb000000 (4016MB), size= 4kB: uncachable, count=1 + +Some cards (especially Voodoo Graphics boards) need this 4 kB area +excluded from the beginning of the region because it is used for +registers. + +NOTE: You can only create type=uncachable region, if the first +region that you created is type=write-combining. +=============================================================================== +Removing MTRRs from the C-shell: +% echo "disable=2" >! /proc/mtrr +or using bash: +% echo "disable=2" >| /proc/mtrr +=============================================================================== +Reading MTRRs from a C program using ioctl()'s: + +/* mtrr-show.c + + Source file for mtrr-show (example program to show MTRRs using ioctl()'s) + + Copyright (C) 1997-1998 Richard Gooch + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Richard Gooch may be reached by email at rgooch@atnf.csiro.au + The postal address is: + Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. +*/ + +/* + This program will use an ioctl() on /proc/mtrr to show the current MTRR + settings. This is an alternative to reading /proc/mtrr. + + + Written by Richard Gooch 17-DEC-1997 + + Last updated by Richard Gooch 2-MAY-1998 + + +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define TRUE 1 +#define FALSE 0 +#define ERRSTRING strerror (errno) + +static char *mtrr_strings[MTRR_NUM_TYPES] = +{ + "uncachable", /* 0 */ + "write-combining", /* 1 */ + "?", /* 2 */ + "?", /* 3 */ + "write-through", /* 4 */ + "write-protect", /* 5 */ + "write-back", /* 6 */ +}; + +int main () +{ + int fd; + struct mtrr_gentry gentry; + + if ( ( fd = open ("/proc/mtrr", O_RDONLY, 0) ) == -1 ) + { + if (errno == ENOENT) + { + fputs ("/proc/mtrr not found: not supported or you don't have a PPro?\n", + stderr); + exit (1); + } + fprintf (stderr, "Error opening /proc/mtrr\t%s\n", ERRSTRING); + exit (2); + } + for (gentry.regnum = 0; ioctl (fd, MTRRIOC_GET_ENTRY, &gentry) == 0; + ++gentry.regnum) + { + if (gentry.size < 1) + { + fprintf (stderr, "Register: %u disabled\n", gentry.regnum); + continue; + } + fprintf (stderr, "Register: %u base: 0x%lx size: 0x%lx type: %s\n", + gentry.regnum, gentry.base, gentry.size, + mtrr_strings[gentry.type]); + } + if (errno == EINVAL) exit (0); + fprintf (stderr, "Error doing ioctl(2) on /dev/mtrr\t%s\n", ERRSTRING); + exit (3); +} /* End Function main */ +=============================================================================== +Creating MTRRs from a C programme using ioctl()'s: + +/* mtrr-add.c + + Source file for mtrr-add (example programme to add an MTRRs using ioctl()) + + Copyright (C) 1997-1998 Richard Gooch + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Richard Gooch may be reached by email at rgooch@atnf.csiro.au + The postal address is: + Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. +*/ + +/* + This programme will use an ioctl() on /proc/mtrr to add an entry. The first + available mtrr is used. This is an alternative to writing /proc/mtrr. + + + Written by Richard Gooch 17-DEC-1997 + + Last updated by Richard Gooch 2-MAY-1998 + + +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define TRUE 1 +#define FALSE 0 +#define ERRSTRING strerror (errno) + +static char *mtrr_strings[MTRR_NUM_TYPES] = +{ + "uncachable", /* 0 */ + "write-combining", /* 1 */ + "?", /* 2 */ + "?", /* 3 */ + "write-through", /* 4 */ + "write-protect", /* 5 */ + "write-back", /* 6 */ +}; + +int main (int argc, char **argv) +{ + int fd; + struct mtrr_sentry sentry; + + if (argc != 4) + { + fprintf (stderr, "Usage:\tmtrr-add base size type\n"); + exit (1); + } + sentry.base = strtoul (argv[1], NULL, 0); + sentry.size = strtoul (argv[2], NULL, 0); + for (sentry.type = 0; sentry.type < MTRR_NUM_TYPES; ++sentry.type) + { + if (strcmp (argv[3], mtrr_strings[sentry.type]) == 0) break; + } + if (sentry.type >= MTRR_NUM_TYPES) + { + fprintf (stderr, "Illegal type: \"%s\"\n", argv[3]); + exit (2); + } + if ( ( fd = open ("/proc/mtrr", O_WRONLY, 0) ) == -1 ) + { + if (errno == ENOENT) + { + fputs ("/proc/mtrr not found: not supported or you don't have a PPro?\n", + stderr); + exit (3); + } + fprintf (stderr, "Error opening /proc/mtrr\t%s\n", ERRSTRING); + exit (4); + } + if (ioctl (fd, MTRRIOC_ADD_ENTRY, &sentry) == -1) + { + fprintf (stderr, "Error doing ioctl(2) on /dev/mtrr\t%s\n", ERRSTRING); + exit (5); + } + fprintf (stderr, "Sleeping for 5 seconds so you can see the new entry\n"); + sleep (5); + close (fd); + fputs ("I've just closed /proc/mtrr so now the new entry should be gone\n", + stderr); +} /* End Function main */ +=============================================================================== diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 03980cb..06f9354 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1131,7 +1131,7 @@ config MTRR You can safely say Y even if your machine doesn't have MTRRs, you'll just add about 9 KB to your kernel. - See for more information. + See for more information. config MTRR_SANITIZER bool -- cgit v0.10.2 From d974ae379a2fbe8948f01eabbc6b19c0a80f09bc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 24 Jul 2008 16:27:46 -0700 Subject: generic, memparse(): constify argument memparse()'s first argument can be const, so it should be. Signed-off-by: Jeremy Fitzhardinge Cc: Andrew Morton Signed-off-by: Ingo Molnar diff --git a/include/linux/kernel.h b/include/linux/kernel.h index fdbbf72..7889c2f 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -176,7 +176,7 @@ extern int vsscanf(const char *, const char *, va_list) extern int get_option(char **str, int *pint); extern char *get_options(const char *str, int nints, int *ints); -extern unsigned long long memparse(char *ptr, char **retptr); +extern unsigned long long memparse(const char *ptr, char **retptr); extern int core_kernel_text(unsigned long addr); extern int __kernel_text_address(unsigned long addr); diff --git a/lib/cmdline.c b/lib/cmdline.c index 5ba8a94..f5f3ad8 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -126,7 +126,7 @@ char *get_options(const char *str, int nints, int *ints) * megabyte, or one gigabyte, respectively. */ -unsigned long long memparse(char *ptr, char **retptr) +unsigned long long memparse(const char *ptr, char **retptr) { char *endptr; /* local pointer to end of parsed string */ -- cgit v0.10.2 From 167e6cf62979df03513898966f9227847d192327 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 24 Jul 2008 16:27:52 -0700 Subject: xen-balloon: fix up sysfs issues 1. Set the class so it doesn't clash with the normal memory class 2. Fix up the sysfs show functions to match the new prototype 3. Clean up use of memparse Signed-off-by: Jeremy Fitzhardinge Cc: "viets@work.de" Cc: Andi Kleen Signed-off-by: Ingo Molnar diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index d4427cb..cdec2d8 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -60,7 +60,7 @@ #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) -#define BALLOON_CLASS_NAME "memory" +#define BALLOON_CLASS_NAME "xen_memory" struct balloon_stats { /* We aim for 'current allocation' == 'target allocation'. */ @@ -588,12 +588,13 @@ static void balloon_release_driver_page(struct page *page) } -#define BALLOON_SHOW(name, format, args...) \ - static ssize_t show_##name(struct sys_device *dev, \ - char *buf) \ - { \ - return sprintf(buf, format, ##args); \ - } \ +#define BALLOON_SHOW(name, format, args...) \ + static ssize_t show_##name(struct sys_device *dev, \ + struct sysdev_attribute *attr, \ + char *buf) \ + { \ + return sprintf(buf, format, ##args); \ + } \ static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); @@ -604,7 +605,8 @@ BALLOON_SHOW(hard_limit_kb, (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0); BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); -static ssize_t show_target_kb(struct sys_device *dev, char *buf) +static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr, + char *buf) { return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); } @@ -614,19 +616,14 @@ static ssize_t store_target_kb(struct sys_device *dev, const char *buf, size_t count) { - char memstring[64], *endchar; + char *endchar; unsigned long long target_bytes; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (count <= 1) - return -EBADMSG; /* runt */ - if (count > sizeof(memstring)) - return -EFBIG; /* too long */ - strcpy(memstring, buf); + target_bytes = memparse(buf, &endchar); - target_bytes = memparse(memstring, &endchar); balloon_set_new_target(target_bytes >> PAGE_SHIFT); return count; -- cgit v0.10.2 From fde28e8f49ed86e771667a3fe81fcc25c93ede8e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 24 Jul 2008 16:28:00 -0700 Subject: xen-balloon: clean up unused functions Remove some unused functions: balloon_update_driver_allowance balloon_release_driver_page only used on the (obsolete, removed) flip path in netfront alloc_empty_pages_and_pagevec free_empty_pages_and_pagevec only used in backend drivers; can be reintroduced when needed Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index cdec2d8..fff987b 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -226,9 +226,8 @@ static int increase_reservation(unsigned long nr_pages) } set_xen_guest_handle(reservation.extent_start, frame_list); - reservation.nr_extents = nr_pages; - rc = HYPERVISOR_memory_op( - XENMEM_populate_physmap, &reservation); + reservation.nr_extents = nr_pages; + rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); if (rc < nr_pages) { if (rc > 0) { int ret; @@ -236,7 +235,7 @@ static int increase_reservation(unsigned long nr_pages) /* We hit the Xen hard limit: reprobe. */ reservation.nr_extents = rc; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, - &reservation); + &reservation); BUG_ON(ret != rc); } if (rc >= 0) @@ -464,130 +463,6 @@ static void balloon_exit(void) module_exit(balloon_exit); -static void balloon_update_driver_allowance(long delta) -{ - unsigned long flags; - - spin_lock_irqsave(&balloon_lock, flags); - balloon_stats.driver_pages += delta; - spin_unlock_irqrestore(&balloon_lock, flags); -} - -static int dealloc_pte_fn( - pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) -{ - unsigned long mfn = pte_mfn(*pte); - int ret; - struct xen_memory_reservation reservation = { - .nr_extents = 1, - .extent_order = 0, - .domid = DOMID_SELF - }; - set_xen_guest_handle(reservation.extent_start, &mfn); - set_pte_at(&init_mm, addr, pte, __pte_ma(0ull)); - set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); - ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); - BUG_ON(ret != 1); - return 0; -} - -static struct page **alloc_empty_pages_and_pagevec(int nr_pages) -{ - unsigned long vaddr, flags; - struct page *page, **pagevec; - int i, ret; - - pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL); - if (pagevec == NULL) - return NULL; - - for (i = 0; i < nr_pages; i++) { - page = pagevec[i] = alloc_page(GFP_KERNEL); - if (page == NULL) - goto err; - - vaddr = (unsigned long)page_address(page); - - scrub_page(page); - - spin_lock_irqsave(&balloon_lock, flags); - - if (xen_feature(XENFEAT_auto_translated_physmap)) { - unsigned long gmfn = page_to_pfn(page); - struct xen_memory_reservation reservation = { - .nr_extents = 1, - .extent_order = 0, - .domid = DOMID_SELF - }; - set_xen_guest_handle(reservation.extent_start, &gmfn); - ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, - &reservation); - if (ret == 1) - ret = 0; /* success */ - } else { - ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE, - dealloc_pte_fn, NULL); - } - - if (ret != 0) { - spin_unlock_irqrestore(&balloon_lock, flags); - __free_page(page); - goto err; - } - - totalram_pages = --balloon_stats.current_pages; - - spin_unlock_irqrestore(&balloon_lock, flags); - } - - out: - schedule_work(&balloon_worker); - flush_tlb_all(); - return pagevec; - - err: - spin_lock_irqsave(&balloon_lock, flags); - while (--i >= 0) - balloon_append(pagevec[i]); - spin_unlock_irqrestore(&balloon_lock, flags); - kfree(pagevec); - pagevec = NULL; - goto out; -} - -static void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages) -{ - unsigned long flags; - int i; - - if (pagevec == NULL) - return; - - spin_lock_irqsave(&balloon_lock, flags); - for (i = 0; i < nr_pages; i++) { - BUG_ON(page_count(pagevec[i]) != 1); - balloon_append(pagevec[i]); - } - spin_unlock_irqrestore(&balloon_lock, flags); - - kfree(pagevec); - - schedule_work(&balloon_worker); -} - -static void balloon_release_driver_page(struct page *page) -{ - unsigned long flags; - - spin_lock_irqsave(&balloon_lock, flags); - balloon_append(page); - balloon_stats.driver_pages--; - spin_unlock_irqrestore(&balloon_lock, flags); - - schedule_work(&balloon_worker); -} - - #define BALLOON_SHOW(name, format, args...) \ static ssize_t show_##name(struct sys_device *dev, \ struct sysdev_attribute *attr, \ @@ -691,20 +566,4 @@ static int register_balloon(struct sys_device *sysdev) return error; } -static void unregister_balloon(struct sys_device *sysdev) -{ - int i; - - sysfs_remove_group(&sysdev->kobj, &balloon_info_group); - for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) - sysdev_remove_file(sysdev, balloon_attrs[i]); - sysdev_unregister(sysdev); - sysdev_class_unregister(&balloon_sysdev_class); -} - -static void balloon_sysfs_exit(void) -{ - unregister_balloon(&balloon_sysdev); -} - MODULE_LICENSE("GPL"); -- cgit v0.10.2 From 8cb22bcb1f3ef70d4d48092e9b057175ad9ec78d Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Fri, 18 Jul 2008 16:03:52 -0500 Subject: x86: L3 cache index disable for 2.6.26 New versions of AMD processors have support to disable parts of their L3 caches if too many MCEs are generated by the L3 cache. This patch provides a /sysfs interface under the cache hierarchy to display which caches indices are disabled (if any) and to monitoring applications to disable a cache index. This patch does not set an automatic policy to disable the L3 cache. Policy decisions would need to be made by a RAS handler. This patch merely makes it easier to see what indices are currently disabled. Signed-off-by: Mark Langsdorf Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index ff517f0b..d6ea50e 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -16,6 +16,7 @@ #include #include +#include #define LVL_1_INST 1 #define LVL_1_DATA 2 @@ -130,6 +131,7 @@ struct _cpuid4_info { union _cpuid4_leaf_ebx ebx; union _cpuid4_leaf_ecx ecx; unsigned long size; + unsigned long can_disable; cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ }; @@ -251,6 +253,13 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, (ebx->split.ways_of_associativity + 1) - 1; } +static void __cpuinit amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) +{ + if (index < 3) + return; + this_leaf->can_disable = 1; +} + static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) { union _cpuid4_leaf_eax eax; @@ -258,9 +267,12 @@ static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_le union _cpuid4_leaf_ecx ecx; unsigned edx; - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { amd_cpuid4(index, &eax, &ebx, &ecx); - else + if (boot_cpu_data.x86 >= 0x10) + amd_check_l3_disable(index, this_leaf); + + } else cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); if (eax.split.type == CACHE_TYPE_NULL) return -EIO; /* better error ? */ @@ -637,6 +649,61 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { } } +#define to_object(k) container_of(k, struct _index_kobject, kobj) +#define to_attr(a) container_of(a, struct _cache_attr, attr) + +static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) +{ + struct pci_dev *dev; + if (this_leaf->can_disable) { + int i; + ssize_t ret = 0; + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + dev = k8_northbridges[node]; + + for (i = 0; i < 2; i++) { + unsigned int reg; + pci_read_config_dword(dev, 0x1BC + i * 4, ®); + ret += sprintf(buf, "%sEntry: %d\n", buf, i); + ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", + buf, + reg & 0x80000000 ? "Disabled" : "Allowed", + reg & 0x40000000 ? "Disabled" : "Allowed"); + ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", buf, + (reg & 0x30000) >> 16, reg & 0xfff); + + } + return ret; + } + return sprintf(buf, "Feature not enabled\n"); +} + +static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, size_t count) +{ + struct pci_dev *dev; + if (this_leaf->can_disable) { + /* write the MSR value */ + unsigned int ret; + unsigned int index, val; + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + dev = k8_northbridges[node]; + + if (strlen(buf) > 15) + return -EINVAL; + ret = sscanf(buf, "%x %x", &index, &val); + if (ret != 2) + return -EINVAL; + if (index > 1) + return -EINVAL; + val |= 0xc0000000; + pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); + wbinvd(); + pci_write_config_dword(dev, 0x1BC + index * 4, val); + return 1; + } + return 0; +} + struct _cache_attr { struct attribute attr; ssize_t (*show)(struct _cpuid4_info *, char *); @@ -657,6 +724,8 @@ define_one_ro(size); define_one_ro(shared_cpu_map); define_one_ro(shared_cpu_list); +static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable); + static struct attribute * default_attrs[] = { &type.attr, &level.attr, @@ -667,12 +736,10 @@ static struct attribute * default_attrs[] = { &size.attr, &shared_cpu_map.attr, &shared_cpu_list.attr, + &cache_disable.attr, NULL }; -#define to_object(k) container_of(k, struct _index_kobject, kobj) -#define to_attr(a) container_of(a, struct _cache_attr, attr) - static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) { struct _cache_attr *fattr = to_attr(attr); @@ -689,7 +756,15 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) static ssize_t store(struct kobject * kobj, struct attribute * attr, const char * buf, size_t count) { - return 0; + struct _cache_attr *fattr = to_attr(attr); + struct _index_kobject *this_leaf = to_object(kobj); + ssize_t ret; + + ret = fattr->store ? + fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), + buf, count) : + 0; + return ret; } static struct sysfs_ops sysfs_ops = { -- cgit v0.10.2 From 7a4983bb5f94f6521aa3236fe5c035cf9bef543f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 21 Jul 2008 13:34:21 +0200 Subject: x86: L3 cache index disable for 2.6.26, cleanups No change in functionality. Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index d6ea50e..a61c9e3 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -253,14 +253,16 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, (ebx->split.ways_of_associativity + 1) - 1; } -static void __cpuinit amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) +static void __cpuinit +amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) { if (index < 3) return; this_leaf->can_disable = 1; } -static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +static int +__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) { union _cpuid4_leaf_eax eax; union _cpuid4_leaf_ebx ebx; @@ -271,19 +273,20 @@ static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_le amd_cpuid4(index, &eax, &ebx, &ecx); if (boot_cpu_data.x86 >= 0x10) amd_check_l3_disable(index, this_leaf); - - } else - cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); + } else { + cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); + } + if (eax.split.type == CACHE_TYPE_NULL) return -EIO; /* better error ? */ this_leaf->eax = eax; this_leaf->ebx = ebx; this_leaf->ecx = ecx; - this_leaf->size = (ecx.split.number_of_sets + 1) * - (ebx.split.coherency_line_size + 1) * - (ebx.split.physical_line_partition + 1) * - (ebx.split.ways_of_associativity + 1); + this_leaf->size = (ecx.split.number_of_sets + 1) * + (ebx.split.coherency_line_size + 1) * + (ebx.split.physical_line_partition + 1) * + (ebx.split.ways_of_associativity + 1); return 0; } @@ -649,59 +652,63 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { } } -#define to_object(k) container_of(k, struct _index_kobject, kobj) -#define to_attr(a) container_of(a, struct _cache_attr, attr) +#define to_object(k) container_of(k, struct _index_kobject, kobj) +#define to_attr(a) container_of(a, struct _cache_attr, attr) static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) { - struct pci_dev *dev; - if (this_leaf->can_disable) { - int i; - ssize_t ret = 0; - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); - dev = k8_northbridges[node]; - - for (i = 0; i < 2; i++) { - unsigned int reg; - pci_read_config_dword(dev, 0x1BC + i * 4, ®); - ret += sprintf(buf, "%sEntry: %d\n", buf, i); - ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", - buf, - reg & 0x80000000 ? "Disabled" : "Allowed", - reg & 0x40000000 ? "Disabled" : "Allowed"); - ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", buf, - (reg & 0x30000) >> 16, reg & 0xfff); + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + struct pci_dev *dev = k8_northbridges[node]; + ssize_t ret = 0; + int i; - } - return ret; + if (!this_leaf->can_disable) + return sprintf(buf, "Feature not enabled\n"); + + for (i = 0; i < 2; i++) { + unsigned int reg; + + pci_read_config_dword(dev, 0x1BC + i * 4, ®); + + ret += sprintf(buf, "%sEntry: %d\n", buf, i); + ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", + buf, + reg & 0x80000000 ? "Disabled" : "Allowed", + reg & 0x40000000 ? "Disabled" : "Allowed"); + ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", + buf, (reg & 0x30000) >> 16, reg & 0xfff); } - return sprintf(buf, "Feature not enabled\n"); + return ret; } -static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, size_t count) +static ssize_t +store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, + size_t count) { - struct pci_dev *dev; - if (this_leaf->can_disable) { - /* write the MSR value */ - unsigned int ret; - unsigned int index, val; - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); - dev = k8_northbridges[node]; - - if (strlen(buf) > 15) - return -EINVAL; - ret = sscanf(buf, "%x %x", &index, &val); - if (ret != 2) - return -EINVAL; - if (index > 1) - return -EINVAL; - val |= 0xc0000000; - pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); - wbinvd(); - pci_write_config_dword(dev, 0x1BC + index * 4, val); - return 1; - } - return 0; + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + struct pci_dev *dev = k8_northbridges[node]; + unsigned int ret, index, val; + + if (!this_leaf->can_disable) + return 0; + + /* write the MSR value */ + + if (strlen(buf) > 15) + return -EINVAL; + + ret = sscanf(buf, "%x %x", &index, &val); + if (ret != 2) + return -EINVAL; + if (index > 1) + return -EINVAL; + + val |= 0xc0000000; + pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); + wbinvd(); + pci_write_config_dword(dev, 0x1BC + index * 4, val); + + return 1; } struct _cache_attr { -- cgit v0.10.2 From a24e8d36f5fc047dac9af6200322ed393f2e3175 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Tue, 22 Jul 2008 13:06:02 -0500 Subject: x86: L3 cache index disable for 2.6.26 On Monday 21 July 2008, Ingo Molnar wrote: > > applied to tip/x86/cpu, thanks Mark. > > > > I've done some coding style fixes for the new functions you've > > introduced, see that commit below. > > -tip testing found the following build failure: > > arch/x86/kernel/built-in.o: In function `show_cache_disable': > intel_cacheinfo.c:(.text+0xbbf2): undefined reference to `k8_northbridges' > arch/x86/kernel/built-in.o: In function `store_cache_disable': > intel_cacheinfo.c:(.text+0xbd91): undefined reference to `k8_northbridges' > > please send a delta fix patch against the tip/x86/cpu branch: > > http://people.redhat.com/mingo/tip.git/README > > which has your patch plus the cleanup applied. delta fix patch follows. It removes the dependency on k8_northbridges. -Mark Langsdorf Operating System Research Center AMD Signed-off-by: Mark Langsdorf Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index a61c9e3..a0c6c6f 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -13,10 +13,10 @@ #include #include #include +#include #include #include -#include #define LVL_1_INST 1 #define LVL_1_DATA 2 @@ -135,6 +135,12 @@ struct _cpuid4_info { cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ }; +static struct pci_device_id k8_nb_id[] = { + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, + {} +}; + unsigned short num_cache_leaves; /* AMD doesn't have CPUID4. Emulate it here to report the same @@ -655,16 +661,39 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { #define to_object(k) container_of(k, struct _index_kobject, kobj) #define to_attr(a) container_of(a, struct _cache_attr, attr) +static struct pci_dev *get_k8_northbridge(int node) +{ + struct pci_dev *dev = NULL; + int i; + + for (i = 0; i <= node; i++) { + do { + dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); + if (!dev) + break; + } while (!pci_match_id(&k8_nb_id[0], dev)); + if (!dev) + break; + } + return dev; +} + static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) { int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); - struct pci_dev *dev = k8_northbridges[node]; + struct pci_dev *dev = NULL; ssize_t ret = 0; int i; if (!this_leaf->can_disable) return sprintf(buf, "Feature not enabled\n"); + dev = get_k8_northbridge(node); + if (!dev) { + printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); + return -EINVAL; + } + for (i = 0; i < 2; i++) { unsigned int reg; @@ -686,14 +715,12 @@ store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, size_t count) { int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); - struct pci_dev *dev = k8_northbridges[node]; + struct pci_dev *dev = NULL; unsigned int ret, index, val; if (!this_leaf->can_disable) return 0; - /* write the MSR value */ - if (strlen(buf) > 15) return -EINVAL; @@ -704,6 +731,12 @@ store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, return -EINVAL; val |= 0xc0000000; + dev = get_k8_northbridge(node); + if (!dev) { + printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); + return -EINVAL; + } + pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); wbinvd(); pci_write_config_dword(dev, 0x1BC + index * 4, val); -- cgit v0.10.2 From cdcf772ed163651cacac8098b4974aba7f9e1c73 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 28 Jul 2008 16:20:08 +0200 Subject: x86 l3 cache index disable for 2 6 26 fix Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index a0c6c6f..535d662 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -1,8 +1,8 @@ /* - * Routines to indentify caches on Intel CPU. + * Routines to indentify caches on Intel CPU. * - * Changes: - * Venkatesh Pallipadi : Adding cache identification through cpuid(4) + * Changes: + * Venkatesh Pallipadi : Adding cache identification through cpuid(4) * Ashok Raj : Work with CPU hotplug infrastructure. * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. */ @@ -136,9 +136,9 @@ struct _cpuid4_info { }; static struct pci_device_id k8_nb_id[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, - {} + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, + {} }; unsigned short num_cache_leaves; @@ -190,9 +190,10 @@ static unsigned short assocs[] __cpuinitdata = { static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 }; static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 }; -static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, - union _cpuid4_leaf_ebx *ebx, - union _cpuid4_leaf_ecx *ecx) +static void __cpuinit +amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, + union _cpuid4_leaf_ebx *ebx, + union _cpuid4_leaf_ecx *ecx) { unsigned dummy; unsigned line_size, lines_per_tag, assoc, size_in_kb; @@ -264,7 +265,7 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) { if (index < 3) return; - this_leaf->can_disable = 1; + this_leaf->can_disable = 1; } static int @@ -474,7 +475,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) /* pointer to _cpuid4_info array (for each cache leaf) */ static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); -#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) +#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) #ifdef CONFIG_SMP static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) @@ -511,7 +512,7 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) this_leaf = CPUID4_INFO_IDX(cpu, index); for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { - sibling_leaf = CPUID4_INFO_IDX(sibling, index); + sibling_leaf = CPUID4_INFO_IDX(sibling, index); cpu_clear(cpu, sibling_leaf->shared_cpu_map); } } @@ -593,7 +594,7 @@ struct _index_kobject { /* pointer to array of kobjects for cpuX/cache/indexY */ static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); -#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) +#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) #define show_one_plus(file_name, object, val) \ static ssize_t show_##file_name \ @@ -675,7 +676,7 @@ static struct pci_dev *get_k8_northbridge(int node) if (!dev) break; } - return dev; + return dev; } static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) @@ -736,7 +737,7 @@ store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); return -EINVAL; } - + pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); wbinvd(); pci_write_config_dword(dev, 0x1BC + index * 4, val); @@ -789,7 +790,7 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) ret = fattr->show ? fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), buf) : - 0; + 0; return ret; } @@ -800,9 +801,9 @@ static ssize_t store(struct kobject * kobj, struct attribute * attr, struct _index_kobject *this_leaf = to_object(kobj); ssize_t ret; - ret = fattr->store ? - fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), - buf, count) : + ret = fattr->store ? + fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), + buf, count) : 0; return ret; } -- cgit v0.10.2 From d89961e2dc87b6e30b8e3f60bd2af5cd92cf4643 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 24 Jul 2008 13:48:58 -0700 Subject: xen: suppress known wrmsrs In general, Xen doesn't support wrmsr from an unprivileged domain; it just ends up ignoring the instruction and printing a message on the console. Given that there are sets of MSRs we know the kernel will try to write to, but we don't care, just eat them in xen_write_msr to cut down on console noise. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b011e4a..b795470 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -854,6 +854,19 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) ret = -EFAULT; break; #endif + + case MSR_STAR: + case MSR_CSTAR: + case MSR_LSTAR: + case MSR_SYSCALL_MASK: + case MSR_IA32_SYSENTER_CS: + case MSR_IA32_SYSENTER_ESP: + case MSR_IA32_SYSENTER_EIP: + /* Fast syscall setup is all done in hypercalls, so + these are all ignored. Stub them out here to stop + Xen console noise. */ + break; + default: ret = native_write_msr_safe(msr, low, high); } -- cgit v0.10.2 From 239bd83104ec6bcba90221d8b0973d2565142ef8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 28 Jul 2008 16:45:49 +0200 Subject: x86: L3 cache index disable for 2.6.26, fix #2 fix !PCI build failure: arch/x86/kernel/cpu/intel_cacheinfo.c: In function 'get_k8_northbridge': arch/x86/kernel/cpu/intel_cacheinfo.c:675: error: implicit declaration of function 'pci_match_id' Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index d763d24..1677b553 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -135,11 +135,13 @@ struct _cpuid4_info { cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ }; +#ifdef CONFIG_PCI static struct pci_device_id k8_nb_id[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, {} }; +#endif unsigned short num_cache_leaves; @@ -663,6 +665,7 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { #define to_object(k) container_of(k, struct _index_kobject, kobj) #define to_attr(a) container_of(a, struct _cache_attr, attr) +#ifdef CONFIG_PCI static struct pci_dev *get_k8_northbridge(int node) { struct pci_dev *dev = NULL; @@ -679,6 +682,12 @@ static struct pci_dev *get_k8_northbridge(int node) } return dev; } +#else +static struct pci_dev *get_k8_northbridge(int node) +{ + return NULL; +} +#endif static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) { -- cgit v0.10.2 From e7f5b309c9bd6142f395c4a36123ebac4bcdc1b0 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:11 +0200 Subject: x86: AMD microcode patch loading support v2 Add an entry to the MAINTAINERS file for AMD CPU microcode patch loading support. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar diff --git a/MAINTAINERS b/MAINTAINERS index 03c5d6cc..28f867d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -384,6 +384,11 @@ M: joerg.roedel@amd.com L: iommu@lists.linux-foundation.org S: Supported +AMD MICROCODE UPDATE SUPPORT +P: Peter Oruba +M: peter.oruba@amd.com +S: Supported + AMS (Apple Motion Sensor) DRIVER P: Stelian Pop M: stelian@popies.net -- cgit v0.10.2 From 9a56a0f80b52cb41c5e0add47c7ce0bb2ef25eb0 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:13 +0200 Subject: x86: moved Intel microcode patch loader declarations to seperate header file Intel specific microcode declarations have been moved to a seperate header file. There are no code changes to the code itself and no side effects to other parts. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 6994c75..0d654bd 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -93,6 +93,7 @@ #include #include #include +#include MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian "); diff --git a/include/asm-x86/microcode.h b/include/asm-x86/microcode.h new file mode 100644 index 0000000..5a05568 --- /dev/null +++ b/include/asm-x86/microcode.h @@ -0,0 +1,34 @@ +struct microcode_header { + unsigned int hdrver; + unsigned int rev; + unsigned int date; + unsigned int sig; + unsigned int cksum; + unsigned int ldrver; + unsigned int pf; + unsigned int datasize; + unsigned int totalsize; + unsigned int reserved[3]; +}; + +struct microcode { + struct microcode_header hdr; + unsigned int bits[0]; +}; + +typedef struct microcode microcode_t; +typedef struct microcode_header microcode_header_t; + +/* microcode format is extended from prescott processors */ +struct extended_signature { + unsigned int sig; + unsigned int pf; + unsigned int cksum; +}; + +struct extended_sigtable { + unsigned int count; + unsigned int cksum; + unsigned int reserved[3]; + struct extended_signature sigs[0]; +}; diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 5f58da4..58a76f6 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -561,41 +561,6 @@ static inline void clear_in_cr4(unsigned long mask) write_cr4(cr4); } -struct microcode_header { - unsigned int hdrver; - unsigned int rev; - unsigned int date; - unsigned int sig; - unsigned int cksum; - unsigned int ldrver; - unsigned int pf; - unsigned int datasize; - unsigned int totalsize; - unsigned int reserved[3]; -}; - -struct microcode { - struct microcode_header hdr; - unsigned int bits[0]; -}; - -typedef struct microcode microcode_t; -typedef struct microcode_header microcode_header_t; - -/* microcode format is extended from prescott processors */ -struct extended_signature { - unsigned int sig; - unsigned int pf; - unsigned int cksum; -}; - -struct extended_sigtable { - unsigned int count; - unsigned int cksum; - unsigned int reserved[3]; - struct extended_signature sigs[0]; -}; - typedef struct { unsigned long seg; } mm_segment_t; -- cgit v0.10.2 From 8e61028dfdc6b8ca996abfe8f9baef6792ea2904 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:14 +0200 Subject: x86: typedef removal Removed typedefs. No functional changes to the code. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 0d654bd..74e6a77 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -102,17 +102,17 @@ MODULE_LICENSE("GPL"); #define MICROCODE_VERSION "1.14a" #define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ -#define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */ +#define MC_HEADER_SIZE (sizeof (struct microcode_header)) /* 48 bytes */ #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ #define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */ #define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */ #define DWSIZE (sizeof (u32)) #define get_totalsize(mc) \ - (((microcode_t *)mc)->hdr.totalsize ? \ - ((microcode_t *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) + (((struct microcode *)mc)->hdr.totalsize ? \ + ((struct microcode *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) #define get_datasize(mc) \ - (((microcode_t *)mc)->hdr.datasize ? \ - ((microcode_t *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) + (((struct microcode *)mc)->hdr.datasize ? \ + ((struct microcode *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) #define sigmatch(s1, s2, p1, p2) \ (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) @@ -130,7 +130,7 @@ static struct ucode_cpu_info { unsigned int sig; unsigned int pf; unsigned int rev; - microcode_t *mc; + struct microcode *mc; } ucode_cpu_info[NR_CPUS]; static void collect_cpu_info(int cpu_num) @@ -171,7 +171,7 @@ static void collect_cpu_info(int cpu_num) } static inline int microcode_update_match(int cpu_num, - microcode_header_t *mc_header, int sig, int pf) + struct microcode_header *mc_header, int sig, int pf) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; @@ -183,7 +183,7 @@ static inline int microcode_update_match(int cpu_num, static int microcode_sanity_check(void *mc) { - microcode_header_t *mc_header = mc; + struct microcode_header *mc_header = mc; struct extended_sigtable *ext_header = NULL; struct extended_signature *ext_sig; unsigned long total_size, data_size, ext_table_size; @@ -268,7 +268,7 @@ static int microcode_sanity_check(void *mc) static int get_maching_microcode(void *mc, int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - microcode_header_t *mc_header = mc; + struct microcode_header *mc_header = mc; struct extended_sigtable *ext_header; unsigned long total_size = get_totalsize(mc_header); int ext_sigcount, i; @@ -355,7 +355,7 @@ static unsigned int user_buffer_size; /* it's size */ static long get_next_ucode(void **mc, long offset) { - microcode_header_t mc_header; + struct microcode_header mc_header; unsigned long total_size; /* No more data */ @@ -497,13 +497,13 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); static long get_next_ucode_from_buffer(void **mc, const u8 *buf, unsigned long size, long offset) { - microcode_header_t *mc_header; + struct microcode_header *mc_header; unsigned long total_size; /* No more data */ if (offset >= size) return 0; - mc_header = (microcode_header_t *)(buf + offset); + mc_header = (struct microcode_header *)(buf + offset); total_size = get_totalsize(mc_header); if (offset + total_size > size) { diff --git a/include/asm-x86/microcode.h b/include/asm-x86/microcode.h index 5a05568..1519ef0 100644 --- a/include/asm-x86/microcode.h +++ b/include/asm-x86/microcode.h @@ -16,9 +16,6 @@ struct microcode { unsigned int bits[0]; }; -typedef struct microcode microcode_t; -typedef struct microcode_header microcode_header_t; - /* microcode format is extended from prescott processors */ struct extended_signature { unsigned int sig; -- cgit v0.10.2 From c3b71bcec0380836caac9b524fa1585b469b7456 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:15 +0200 Subject: x86: move per CPU microcode structure declaration to header file This structure will be later used by other modules as well and needs therfore to be moved out to a header file. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 74e6a77..4e7b2f6 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -125,13 +125,7 @@ static DEFINE_SPINLOCK(microcode_update_lock); /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ static DEFINE_MUTEX(microcode_mutex); -static struct ucode_cpu_info { - int valid; - unsigned int sig; - unsigned int pf; - unsigned int rev; - struct microcode *mc; -} ucode_cpu_info[NR_CPUS]; +static struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; static void collect_cpu_info(int cpu_num) { diff --git a/include/asm-x86/microcode.h b/include/asm-x86/microcode.h index 1519ef0..d34a1fc 100644 --- a/include/asm-x86/microcode.h +++ b/include/asm-x86/microcode.h @@ -29,3 +29,11 @@ struct extended_sigtable { unsigned int reserved[3]; struct extended_signature sigs[0]; }; + +struct ucode_cpu_info { + int valid; + unsigned int sig; + unsigned int pf; + unsigned int rev; + struct microcode *mc; +}; -- cgit v0.10.2 From 1abae31096007cc993f67ae2576fe8f812270ad6 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:16 +0200 Subject: x86: move microcode.c to microcode_intel.c Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 3db651f..a9be2a0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -51,7 +51,7 @@ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o -obj-$(CONFIG_MICROCODE) += microcode.o +obj-$(CONFIG_MICROCODE) += microcode_intel.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c deleted file mode 100644 index 4e7b2f6..0000000 --- a/arch/x86/kernel/microcode.c +++ /dev/null @@ -1,855 +0,0 @@ -/* - * Intel CPU Microcode Update Driver for Linux - * - * Copyright (C) 2000-2006 Tigran Aivazian - * 2006 Shaohua Li - * - * This driver allows to upgrade microcode on Intel processors - * belonging to IA-32 family - PentiumPro, Pentium II, - * Pentium III, Xeon, Pentium 4, etc. - * - * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture - * Software Developer's Manual - * Order Number 253668 or free download from: - * - * http://developer.intel.com/design/pentium4/manuals/253668.htm - * - * For more information, go to http://www.urbanmyth.org/microcode - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * 1.0 16 Feb 2000, Tigran Aivazian - * Initial release. - * 1.01 18 Feb 2000, Tigran Aivazian - * Added read() support + cleanups. - * 1.02 21 Feb 2000, Tigran Aivazian - * Added 'device trimming' support. open(O_WRONLY) zeroes - * and frees the saved copy of applied microcode. - * 1.03 29 Feb 2000, Tigran Aivazian - * Made to use devfs (/dev/cpu/microcode) + cleanups. - * 1.04 06 Jun 2000, Simon Trimmer - * Added misc device support (now uses both devfs and misc). - * Added MICROCODE_IOCFREE ioctl to clear memory. - * 1.05 09 Jun 2000, Simon Trimmer - * Messages for error cases (non Intel & no suitable microcode). - * 1.06 03 Aug 2000, Tigran Aivazian - * Removed ->release(). Removed exclusive open and status bitmap. - * Added microcode_rwsem to serialize read()/write()/ioctl(). - * Removed global kernel lock usage. - * 1.07 07 Sep 2000, Tigran Aivazian - * Write 0 to 0x8B msr and then cpuid before reading revision, - * so that it works even if there were no update done by the - * BIOS. Otherwise, reading from 0x8B gives junk (which happened - * to be 0 on my machine which is why it worked even when I - * disabled update by the BIOS) - * Thanks to Eric W. Biederman for the fix. - * 1.08 11 Dec 2000, Richard Schaal and - * Tigran Aivazian - * Intel Pentium 4 processor support and bugfixes. - * 1.09 30 Oct 2001, Tigran Aivazian - * Bugfix for HT (Hyper-Threading) enabled processors - * whereby processor resources are shared by all logical processors - * in a single CPU package. - * 1.10 28 Feb 2002 Asit K Mallick and - * Tigran Aivazian , - * Serialize updates as required on HT processors due to speculative - * nature of implementation. - * 1.11 22 Mar 2002 Tigran Aivazian - * Fix the panic when writing zero-length microcode chunk. - * 1.12 29 Sep 2003 Nitin Kamble , - * Jun Nakajima - * Support for the microcode updates in the new format. - * 1.13 10 Oct 2003 Tigran Aivazian - * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl - * because we no longer hold a copy of applied microcode - * in kernel memory. - * 1.14 25 Jun 2004 Tigran Aivazian - * Fix sigmatch() macro to handle old CPUs with pf == 0. - * Thanks to Stuart Swales for pointing out this bug. - */ - -//#define DEBUG /* pr_debug */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); -MODULE_AUTHOR("Tigran Aivazian "); -MODULE_LICENSE("GPL"); - -#define MICROCODE_VERSION "1.14a" - -#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ -#define MC_HEADER_SIZE (sizeof (struct microcode_header)) /* 48 bytes */ -#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ -#define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */ -#define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */ -#define DWSIZE (sizeof (u32)) -#define get_totalsize(mc) \ - (((struct microcode *)mc)->hdr.totalsize ? \ - ((struct microcode *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) -#define get_datasize(mc) \ - (((struct microcode *)mc)->hdr.datasize ? \ - ((struct microcode *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) - -#define sigmatch(s1, s2, p1, p2) \ - (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) - -#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) - -/* serialize access to the physical write to MSR 0x79 */ -static DEFINE_SPINLOCK(microcode_update_lock); - -/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -static DEFINE_MUTEX(microcode_mutex); - -static struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; - -static void collect_cpu_info(int cpu_num) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu_num); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - unsigned int val[2]; - - /* We should bind the task to the CPU */ - BUG_ON(raw_smp_processor_id() != cpu_num); - uci->pf = uci->rev = 0; - uci->mc = NULL; - uci->valid = 1; - - if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64)) { - printk(KERN_ERR "microcode: CPU%d not a capable Intel " - "processor\n", cpu_num); - uci->valid = 0; - return; - } - - uci->sig = cpuid_eax(0x00000001); - - if ((c->x86_model >= 5) || (c->x86 > 6)) { - /* get processor flags from MSR 0x17 */ - rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - uci->pf = 1 << ((val[1] >> 18) & 7); - } - - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - /* see notes above for revision 1.07. Apparent chip bug */ - sync_core(); - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev); - pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", - uci->sig, uci->pf, uci->rev); -} - -static inline int microcode_update_match(int cpu_num, - struct microcode_header *mc_header, int sig, int pf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - - if (!sigmatch(sig, uci->sig, pf, uci->pf) - || mc_header->rev <= uci->rev) - return 0; - return 1; -} - -static int microcode_sanity_check(void *mc) -{ - struct microcode_header *mc_header = mc; - struct extended_sigtable *ext_header = NULL; - struct extended_signature *ext_sig; - unsigned long total_size, data_size, ext_table_size; - int sum, orig_sum, ext_sigcount = 0, i; - - total_size = get_totalsize(mc_header); - data_size = get_datasize(mc_header); - if (data_size + MC_HEADER_SIZE > total_size) { - printk(KERN_ERR "microcode: error! " - "Bad data size in microcode data file\n"); - return -EINVAL; - } - - if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { - printk(KERN_ERR "microcode: error! " - "Unknown microcode update format\n"); - return -EINVAL; - } - ext_table_size = total_size - (MC_HEADER_SIZE + data_size); - if (ext_table_size) { - if ((ext_table_size < EXT_HEADER_SIZE) - || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { - printk(KERN_ERR "microcode: error! " - "Small exttable size in microcode data file\n"); - return -EINVAL; - } - ext_header = mc + MC_HEADER_SIZE + data_size; - if (ext_table_size != exttable_size(ext_header)) { - printk(KERN_ERR "microcode: error! " - "Bad exttable size in microcode data file\n"); - return -EFAULT; - } - ext_sigcount = ext_header->count; - } - - /* check extended table checksum */ - if (ext_table_size) { - int ext_table_sum = 0; - int *ext_tablep = (int *)ext_header; - - i = ext_table_size / DWSIZE; - while (i--) - ext_table_sum += ext_tablep[i]; - if (ext_table_sum) { - printk(KERN_WARNING "microcode: aborting, " - "bad extended signature table checksum\n"); - return -EINVAL; - } - } - - /* calculate the checksum */ - orig_sum = 0; - i = (MC_HEADER_SIZE + data_size) / DWSIZE; - while (i--) - orig_sum += ((int *)mc)[i]; - if (orig_sum) { - printk(KERN_ERR "microcode: aborting, bad checksum\n"); - return -EINVAL; - } - if (!ext_table_size) - return 0; - /* check extended signature checksum */ - for (i = 0; i < ext_sigcount; i++) { - ext_sig = (void *)ext_header + EXT_HEADER_SIZE + - EXT_SIGNATURE_SIZE * i; - sum = orig_sum - - (mc_header->sig + mc_header->pf + mc_header->cksum) - + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); - if (sum) { - printk(KERN_ERR "microcode: aborting, bad checksum\n"); - return -EINVAL; - } - } - return 0; -} - -/* - * return 0 - no update found - * return 1 - found update - * return < 0 - error - */ -static int get_maching_microcode(void *mc, int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - struct microcode_header *mc_header = mc; - struct extended_sigtable *ext_header; - unsigned long total_size = get_totalsize(mc_header); - int ext_sigcount, i; - struct extended_signature *ext_sig; - void *new_mc; - - if (microcode_update_match(cpu, mc_header, - mc_header->sig, mc_header->pf)) - goto find; - - if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) - return 0; - - ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; - ext_sigcount = ext_header->count; - ext_sig = (void *)ext_header + EXT_HEADER_SIZE; - for (i = 0; i < ext_sigcount; i++) { - if (microcode_update_match(cpu, mc_header, - ext_sig->sig, ext_sig->pf)) - goto find; - ext_sig++; - } - return 0; -find: - pr_debug("microcode: CPU%d found a matching microcode update with" - " version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev); - new_mc = vmalloc(total_size); - if (!new_mc) { - printk(KERN_ERR "microcode: error! Can not allocate memory\n"); - return -ENOMEM; - } - - /* free previous update file */ - vfree(uci->mc); - - memcpy(new_mc, mc, total_size); - uci->mc = new_mc; - return 1; -} - -static void apply_microcode(int cpu) -{ - unsigned long flags; - unsigned int val[2]; - int cpu_num = raw_smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - - /* We should bind the task to the CPU */ - BUG_ON(cpu_num != cpu); - - if (uci->mc == NULL) - return; - - /* serialize access to the physical write to MSR 0x79 */ - spin_lock_irqsave(µcode_update_lock, flags); - - /* write microcode via MSR 0x79 */ - wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) uci->mc->bits, - (unsigned long) uci->mc->bits >> 16 >> 16); - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - - /* see notes above for revision 1.07. Apparent chip bug */ - sync_core(); - - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - - spin_unlock_irqrestore(µcode_update_lock, flags); - if (val[1] != uci->mc->hdr.rev) { - printk(KERN_ERR "microcode: CPU%d update from revision " - "0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]); - return; - } - printk(KERN_INFO "microcode: CPU%d updated from revision " - "0x%x to 0x%x, date = %08x \n", - cpu_num, uci->rev, val[1], uci->mc->hdr.date); - uci->rev = val[1]; -} - -#ifdef CONFIG_MICROCODE_OLD_INTERFACE -static void __user *user_buffer; /* user area microcode data buffer */ -static unsigned int user_buffer_size; /* it's size */ - -static long get_next_ucode(void **mc, long offset) -{ - struct microcode_header mc_header; - unsigned long total_size; - - /* No more data */ - if (offset >= user_buffer_size) - return 0; - if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - return -EFAULT; - } - total_size = get_totalsize(&mc_header); - if (offset + total_size > user_buffer_size) { - printk(KERN_ERR "microcode: error! Bad total size in microcode " - "data file\n"); - return -EINVAL; - } - *mc = vmalloc(total_size); - if (!*mc) - return -ENOMEM; - if (copy_from_user(*mc, user_buffer + offset, total_size)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - vfree(*mc); - return -EFAULT; - } - return offset + total_size; -} - -static int do_microcode_update (void) -{ - long cursor = 0; - int error = 0; - void *new_mc = NULL; - int cpu; - cpumask_t old; - cpumask_of_cpu_ptr_declare(newmask); - - old = current->cpus_allowed; - - while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) { - error = microcode_sanity_check(new_mc); - if (error) - goto out; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - for_each_online_cpu(cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!uci->valid) - continue; - cpumask_of_cpu_ptr_next(newmask, cpu); - set_cpus_allowed_ptr(current, newmask); - error = get_maching_microcode(new_mc, cpu); - if (error < 0) - goto out; - if (error == 1) - apply_microcode(cpu); - } - vfree(new_mc); - } -out: - if (cursor > 0) - vfree(new_mc); - if (cursor < 0) - error = cursor; - set_cpus_allowed_ptr(current, &old); - return error; -} - -static int microcode_open (struct inode *unused1, struct file *unused2) -{ - cycle_kernel_lock(); - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; -} - -static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) -{ - ssize_t ret; - - if ((len >> PAGE_SHIFT) > num_physpages) { - printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); - return -EINVAL; - } - - get_online_cpus(); - mutex_lock(µcode_mutex); - - user_buffer = (void __user *) buf; - user_buffer_size = (int) len; - - ret = do_microcode_update(); - if (!ret) - ret = (ssize_t)len; - - mutex_unlock(µcode_mutex); - put_online_cpus(); - - return ret; -} - -static const struct file_operations microcode_fops = { - .owner = THIS_MODULE, - .write = microcode_write, - .open = microcode_open, -}; - -static struct miscdevice microcode_dev = { - .minor = MICROCODE_MINOR, - .name = "microcode", - .fops = µcode_fops, -}; - -static int __init microcode_dev_init (void) -{ - int error; - - error = misc_register(µcode_dev); - if (error) { - printk(KERN_ERR - "microcode: can't misc_register on minor=%d\n", - MICROCODE_MINOR); - return error; - } - - return 0; -} - -static void microcode_dev_exit (void) -{ - misc_deregister(µcode_dev); -} - -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); -#else -#define microcode_dev_init() 0 -#define microcode_dev_exit() do { } while(0) -#endif - -static long get_next_ucode_from_buffer(void **mc, const u8 *buf, - unsigned long size, long offset) -{ - struct microcode_header *mc_header; - unsigned long total_size; - - /* No more data */ - if (offset >= size) - return 0; - mc_header = (struct microcode_header *)(buf + offset); - total_size = get_totalsize(mc_header); - - if (offset + total_size > size) { - printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); - return -EINVAL; - } - - *mc = vmalloc(total_size); - if (!*mc) { - printk(KERN_ERR "microcode: error! Can not allocate memory\n"); - return -ENOMEM; - } - memcpy(*mc, buf + offset, total_size); - return offset + total_size; -} - -/* fake device for request_firmware */ -static struct platform_device *microcode_pdev; - -static int cpu_request_microcode(int cpu) -{ - char name[30]; - struct cpuinfo_x86 *c = &cpu_data(cpu); - const struct firmware *firmware; - const u8 *buf; - unsigned long size; - long offset = 0; - int error; - void *mc; - - /* We should bind the task to the CPU */ - BUG_ON(cpu != raw_smp_processor_id()); - sprintf(name,"intel-ucode/%02x-%02x-%02x", - c->x86, c->x86_model, c->x86_mask); - error = request_firmware(&firmware, name, µcode_pdev->dev); - if (error) { - pr_debug("microcode: data file %s load failed\n", name); - return error; - } - buf = firmware->data; - size = firmware->size; - while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset)) - > 0) { - error = microcode_sanity_check(mc); - if (error) - break; - error = get_maching_microcode(mc, cpu); - if (error < 0) - break; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - if (error == 1) { - apply_microcode(cpu); - error = 0; - } - vfree(mc); - } - if (offset > 0) - vfree(mc); - if (offset < 0) - error = offset; - release_firmware(firmware); - - return error; -} - -static int apply_microcode_check_cpu(int cpu) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); - unsigned int val[2]; - int err = 0; - - /* Check if the microcode is available */ - if (!uci->mc) - return 0; - - old = current->cpus_allowed; - set_cpus_allowed_ptr(current, newmask); - - /* Check if the microcode we have in memory matches the CPU */ - if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001)) - err = -EINVAL; - - if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) { - /* get processor flags from MSR 0x17 */ - rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - if (uci->pf != (1 << ((val[1] >> 18) & 7))) - err = -EINVAL; - } - - if (!err) { - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - /* see notes above for revision 1.07. Apparent chip bug */ - sync_core(); - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - if (uci->rev != val[1]) - err = -EINVAL; - } - - if (!err) - apply_microcode(cpu); - else - printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:" - " sig=0x%x, pf=0x%x, rev=0x%x\n", - cpu, uci->sig, uci->pf, uci->rev); - - set_cpus_allowed_ptr(current, &old); - return err; -} - -static void microcode_init_cpu(int cpu, int resume) -{ - cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - old = current->cpus_allowed; - - set_cpus_allowed_ptr(current, newmask); - mutex_lock(µcode_mutex); - collect_cpu_info(cpu); - if (uci->valid && system_state == SYSTEM_RUNNING && !resume) - cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); - set_cpus_allowed_ptr(current, &old); -} - -static void microcode_fini_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - mutex_lock(µcode_mutex); - uci->valid = 0; - vfree(uci->mc); - uci->mc = NULL; - mutex_unlock(µcode_mutex); -} - -static ssize_t reload_store(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, size_t sz) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - char *end; - unsigned long val = simple_strtoul(buf, &end, 0); - int err = 0; - int cpu = dev->id; - - if (end == buf) - return -EINVAL; - if (val == 1) { - cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); - - old = current->cpus_allowed; - - get_online_cpus(); - set_cpus_allowed_ptr(current, newmask); - - mutex_lock(µcode_mutex); - if (uci->valid) - err = cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); - put_online_cpus(); - set_cpus_allowed_ptr(current, &old); - } - if (err) - return err; - return sz; -} - -static ssize_t version_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->rev); -} - -static ssize_t pf_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->pf); -} - -static SYSDEV_ATTR(reload, 0200, NULL, reload_store); -static SYSDEV_ATTR(version, 0400, version_show, NULL); -static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); - -static struct attribute *mc_default_attrs[] = { - &attr_reload.attr, - &attr_version.attr, - &attr_processor_flags.attr, - NULL -}; - -static struct attribute_group mc_attr_group = { - .attrs = mc_default_attrs, - .name = "microcode", -}; - -static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) -{ - int err, cpu = sys_dev->id; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("microcode: CPU%d added\n", cpu); - memset(uci, 0, sizeof(*uci)); - - err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); - if (err) - return err; - - microcode_init_cpu(cpu, resume); - - return 0; -} - -static int mc_sysdev_add(struct sys_device *sys_dev) -{ - return __mc_sysdev_add(sys_dev, 0); -} - -static int mc_sysdev_remove(struct sys_device *sys_dev) -{ - int cpu = sys_dev->id; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("microcode: CPU%d removed\n", cpu); - microcode_fini_cpu(cpu); - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - return 0; -} - -static int mc_sysdev_resume(struct sys_device *dev) -{ - int cpu = dev->id; - - if (!cpu_online(cpu)) - return 0; - pr_debug("microcode: CPU%d resumed\n", cpu); - /* only CPU 0 will apply ucode here */ - apply_microcode(0); - return 0; -} - -static struct sysdev_driver mc_sysdev_driver = { - .add = mc_sysdev_add, - .remove = mc_sysdev_remove, - .resume = mc_sysdev_resume, -}; - -static __cpuinit int -mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; - - sys_dev = get_cpu_sysdev(cpu); - switch (action) { - case CPU_UP_CANCELED_FROZEN: - /* The CPU refused to come up during a system resume */ - microcode_fini_cpu(cpu); - break; - case CPU_ONLINE: - case CPU_DOWN_FAILED: - mc_sysdev_add(sys_dev); - break; - case CPU_ONLINE_FROZEN: - /* System-wide resume is in progress, try to apply microcode */ - if (apply_microcode_check_cpu(cpu)) { - /* The application of microcode failed */ - microcode_fini_cpu(cpu); - __mc_sysdev_add(sys_dev, 1); - break; - } - case CPU_DOWN_FAILED_FROZEN: - if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) - printk(KERN_ERR "microcode: Failed to create the sysfs " - "group for CPU%d\n", cpu); - break; - case CPU_DOWN_PREPARE: - mc_sysdev_remove(sys_dev); - break; - case CPU_DOWN_PREPARE_FROZEN: - /* Suspend is in progress, only remove the interface */ - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __refdata mc_cpu_notifier = { - .notifier_call = mc_cpu_callback, -}; - -static int __init microcode_init (void) -{ - int error; - - printk(KERN_INFO - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); - - error = microcode_dev_init(); - if (error) - return error; - microcode_pdev = platform_device_register_simple("microcode", -1, - NULL, 0); - if (IS_ERR(microcode_pdev)) { - microcode_dev_exit(); - return PTR_ERR(microcode_pdev); - } - - get_online_cpus(); - error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); - put_online_cpus(); - if (error) { - microcode_dev_exit(); - platform_device_unregister(microcode_pdev); - return error; - } - - register_hotcpu_notifier(&mc_cpu_notifier); - return 0; -} - -static void __exit microcode_exit (void) -{ - microcode_dev_exit(); - - unregister_hotcpu_notifier(&mc_cpu_notifier); - - get_online_cpus(); - sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); - put_online_cpus(); - - platform_device_unregister(microcode_pdev); -} - -module_init(microcode_init) -module_exit(microcode_exit) diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c new file mode 100644 index 0000000..4e7b2f6 --- /dev/null +++ b/arch/x86/kernel/microcode_intel.c @@ -0,0 +1,855 @@ +/* + * Intel CPU Microcode Update Driver for Linux + * + * Copyright (C) 2000-2006 Tigran Aivazian + * 2006 Shaohua Li + * + * This driver allows to upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, + * Pentium III, Xeon, Pentium 4, etc. + * + * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture + * Software Developer's Manual + * Order Number 253668 or free download from: + * + * http://developer.intel.com/design/pentium4/manuals/253668.htm + * + * For more information, go to http://www.urbanmyth.org/microcode + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * 1.0 16 Feb 2000, Tigran Aivazian + * Initial release. + * 1.01 18 Feb 2000, Tigran Aivazian + * Added read() support + cleanups. + * 1.02 21 Feb 2000, Tigran Aivazian + * Added 'device trimming' support. open(O_WRONLY) zeroes + * and frees the saved copy of applied microcode. + * 1.03 29 Feb 2000, Tigran Aivazian + * Made to use devfs (/dev/cpu/microcode) + cleanups. + * 1.04 06 Jun 2000, Simon Trimmer + * Added misc device support (now uses both devfs and misc). + * Added MICROCODE_IOCFREE ioctl to clear memory. + * 1.05 09 Jun 2000, Simon Trimmer + * Messages for error cases (non Intel & no suitable microcode). + * 1.06 03 Aug 2000, Tigran Aivazian + * Removed ->release(). Removed exclusive open and status bitmap. + * Added microcode_rwsem to serialize read()/write()/ioctl(). + * Removed global kernel lock usage. + * 1.07 07 Sep 2000, Tigran Aivazian + * Write 0 to 0x8B msr and then cpuid before reading revision, + * so that it works even if there were no update done by the + * BIOS. Otherwise, reading from 0x8B gives junk (which happened + * to be 0 on my machine which is why it worked even when I + * disabled update by the BIOS) + * Thanks to Eric W. Biederman for the fix. + * 1.08 11 Dec 2000, Richard Schaal and + * Tigran Aivazian + * Intel Pentium 4 processor support and bugfixes. + * 1.09 30 Oct 2001, Tigran Aivazian + * Bugfix for HT (Hyper-Threading) enabled processors + * whereby processor resources are shared by all logical processors + * in a single CPU package. + * 1.10 28 Feb 2002 Asit K Mallick and + * Tigran Aivazian , + * Serialize updates as required on HT processors due to speculative + * nature of implementation. + * 1.11 22 Mar 2002 Tigran Aivazian + * Fix the panic when writing zero-length microcode chunk. + * 1.12 29 Sep 2003 Nitin Kamble , + * Jun Nakajima + * Support for the microcode updates in the new format. + * 1.13 10 Oct 2003 Tigran Aivazian + * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl + * because we no longer hold a copy of applied microcode + * in kernel memory. + * 1.14 25 Jun 2004 Tigran Aivazian + * Fix sigmatch() macro to handle old CPUs with pf == 0. + * Thanks to Stuart Swales for pointing out this bug. + */ + +//#define DEBUG /* pr_debug */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); +MODULE_AUTHOR("Tigran Aivazian "); +MODULE_LICENSE("GPL"); + +#define MICROCODE_VERSION "1.14a" + +#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ +#define MC_HEADER_SIZE (sizeof (struct microcode_header)) /* 48 bytes */ +#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ +#define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */ +#define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */ +#define DWSIZE (sizeof (u32)) +#define get_totalsize(mc) \ + (((struct microcode *)mc)->hdr.totalsize ? \ + ((struct microcode *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) +#define get_datasize(mc) \ + (((struct microcode *)mc)->hdr.datasize ? \ + ((struct microcode *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) + +#define sigmatch(s1, s2, p1, p2) \ + (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) + +#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) + +/* serialize access to the physical write to MSR 0x79 */ +static DEFINE_SPINLOCK(microcode_update_lock); + +/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ +static DEFINE_MUTEX(microcode_mutex); + +static struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; + +static void collect_cpu_info(int cpu_num) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu_num); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + unsigned int val[2]; + + /* We should bind the task to the CPU */ + BUG_ON(raw_smp_processor_id() != cpu_num); + uci->pf = uci->rev = 0; + uci->mc = NULL; + uci->valid = 1; + + if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || + cpu_has(c, X86_FEATURE_IA64)) { + printk(KERN_ERR "microcode: CPU%d not a capable Intel " + "processor\n", cpu_num); + uci->valid = 0; + return; + } + + uci->sig = cpuid_eax(0x00000001); + + if ((c->x86_model >= 5) || (c->x86 > 6)) { + /* get processor flags from MSR 0x17 */ + rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); + uci->pf = 1 << ((val[1] >> 18) & 7); + } + + wrmsr(MSR_IA32_UCODE_REV, 0, 0); + /* see notes above for revision 1.07. Apparent chip bug */ + sync_core(); + /* get the current revision from MSR 0x8B */ + rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev); + pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", + uci->sig, uci->pf, uci->rev); +} + +static inline int microcode_update_match(int cpu_num, + struct microcode_header *mc_header, int sig, int pf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + + if (!sigmatch(sig, uci->sig, pf, uci->pf) + || mc_header->rev <= uci->rev) + return 0; + return 1; +} + +static int microcode_sanity_check(void *mc) +{ + struct microcode_header *mc_header = mc; + struct extended_sigtable *ext_header = NULL; + struct extended_signature *ext_sig; + unsigned long total_size, data_size, ext_table_size; + int sum, orig_sum, ext_sigcount = 0, i; + + total_size = get_totalsize(mc_header); + data_size = get_datasize(mc_header); + if (data_size + MC_HEADER_SIZE > total_size) { + printk(KERN_ERR "microcode: error! " + "Bad data size in microcode data file\n"); + return -EINVAL; + } + + if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { + printk(KERN_ERR "microcode: error! " + "Unknown microcode update format\n"); + return -EINVAL; + } + ext_table_size = total_size - (MC_HEADER_SIZE + data_size); + if (ext_table_size) { + if ((ext_table_size < EXT_HEADER_SIZE) + || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { + printk(KERN_ERR "microcode: error! " + "Small exttable size in microcode data file\n"); + return -EINVAL; + } + ext_header = mc + MC_HEADER_SIZE + data_size; + if (ext_table_size != exttable_size(ext_header)) { + printk(KERN_ERR "microcode: error! " + "Bad exttable size in microcode data file\n"); + return -EFAULT; + } + ext_sigcount = ext_header->count; + } + + /* check extended table checksum */ + if (ext_table_size) { + int ext_table_sum = 0; + int *ext_tablep = (int *)ext_header; + + i = ext_table_size / DWSIZE; + while (i--) + ext_table_sum += ext_tablep[i]; + if (ext_table_sum) { + printk(KERN_WARNING "microcode: aborting, " + "bad extended signature table checksum\n"); + return -EINVAL; + } + } + + /* calculate the checksum */ + orig_sum = 0; + i = (MC_HEADER_SIZE + data_size) / DWSIZE; + while (i--) + orig_sum += ((int *)mc)[i]; + if (orig_sum) { + printk(KERN_ERR "microcode: aborting, bad checksum\n"); + return -EINVAL; + } + if (!ext_table_size) + return 0; + /* check extended signature checksum */ + for (i = 0; i < ext_sigcount; i++) { + ext_sig = (void *)ext_header + EXT_HEADER_SIZE + + EXT_SIGNATURE_SIZE * i; + sum = orig_sum + - (mc_header->sig + mc_header->pf + mc_header->cksum) + + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); + if (sum) { + printk(KERN_ERR "microcode: aborting, bad checksum\n"); + return -EINVAL; + } + } + return 0; +} + +/* + * return 0 - no update found + * return 1 - found update + * return < 0 - error + */ +static int get_maching_microcode(void *mc, int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct microcode_header *mc_header = mc; + struct extended_sigtable *ext_header; + unsigned long total_size = get_totalsize(mc_header); + int ext_sigcount, i; + struct extended_signature *ext_sig; + void *new_mc; + + if (microcode_update_match(cpu, mc_header, + mc_header->sig, mc_header->pf)) + goto find; + + if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) + return 0; + + ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; + ext_sigcount = ext_header->count; + ext_sig = (void *)ext_header + EXT_HEADER_SIZE; + for (i = 0; i < ext_sigcount; i++) { + if (microcode_update_match(cpu, mc_header, + ext_sig->sig, ext_sig->pf)) + goto find; + ext_sig++; + } + return 0; +find: + pr_debug("microcode: CPU%d found a matching microcode update with" + " version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev); + new_mc = vmalloc(total_size); + if (!new_mc) { + printk(KERN_ERR "microcode: error! Can not allocate memory\n"); + return -ENOMEM; + } + + /* free previous update file */ + vfree(uci->mc); + + memcpy(new_mc, mc, total_size); + uci->mc = new_mc; + return 1; +} + +static void apply_microcode(int cpu) +{ + unsigned long flags; + unsigned int val[2]; + int cpu_num = raw_smp_processor_id(); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + + /* We should bind the task to the CPU */ + BUG_ON(cpu_num != cpu); + + if (uci->mc == NULL) + return; + + /* serialize access to the physical write to MSR 0x79 */ + spin_lock_irqsave(µcode_update_lock, flags); + + /* write microcode via MSR 0x79 */ + wrmsr(MSR_IA32_UCODE_WRITE, + (unsigned long) uci->mc->bits, + (unsigned long) uci->mc->bits >> 16 >> 16); + wrmsr(MSR_IA32_UCODE_REV, 0, 0); + + /* see notes above for revision 1.07. Apparent chip bug */ + sync_core(); + + /* get the current revision from MSR 0x8B */ + rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); + + spin_unlock_irqrestore(µcode_update_lock, flags); + if (val[1] != uci->mc->hdr.rev) { + printk(KERN_ERR "microcode: CPU%d update from revision " + "0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]); + return; + } + printk(KERN_INFO "microcode: CPU%d updated from revision " + "0x%x to 0x%x, date = %08x \n", + cpu_num, uci->rev, val[1], uci->mc->hdr.date); + uci->rev = val[1]; +} + +#ifdef CONFIG_MICROCODE_OLD_INTERFACE +static void __user *user_buffer; /* user area microcode data buffer */ +static unsigned int user_buffer_size; /* it's size */ + +static long get_next_ucode(void **mc, long offset) +{ + struct microcode_header mc_header; + unsigned long total_size; + + /* No more data */ + if (offset >= user_buffer_size) + return 0; + if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) { + printk(KERN_ERR "microcode: error! Can not read user data\n"); + return -EFAULT; + } + total_size = get_totalsize(&mc_header); + if (offset + total_size > user_buffer_size) { + printk(KERN_ERR "microcode: error! Bad total size in microcode " + "data file\n"); + return -EINVAL; + } + *mc = vmalloc(total_size); + if (!*mc) + return -ENOMEM; + if (copy_from_user(*mc, user_buffer + offset, total_size)) { + printk(KERN_ERR "microcode: error! Can not read user data\n"); + vfree(*mc); + return -EFAULT; + } + return offset + total_size; +} + +static int do_microcode_update (void) +{ + long cursor = 0; + int error = 0; + void *new_mc = NULL; + int cpu; + cpumask_t old; + cpumask_of_cpu_ptr_declare(newmask); + + old = current->cpus_allowed; + + while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) { + error = microcode_sanity_check(new_mc); + if (error) + goto out; + /* + * It's possible the data file has multiple matching ucode, + * lets keep searching till the latest version + */ + for_each_online_cpu(cpu) { + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!uci->valid) + continue; + cpumask_of_cpu_ptr_next(newmask, cpu); + set_cpus_allowed_ptr(current, newmask); + error = get_maching_microcode(new_mc, cpu); + if (error < 0) + goto out; + if (error == 1) + apply_microcode(cpu); + } + vfree(new_mc); + } +out: + if (cursor > 0) + vfree(new_mc); + if (cursor < 0) + error = cursor; + set_cpus_allowed_ptr(current, &old); + return error; +} + +static int microcode_open (struct inode *unused1, struct file *unused2) +{ + cycle_kernel_lock(); + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; +} + +static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) +{ + ssize_t ret; + + if ((len >> PAGE_SHIFT) > num_physpages) { + printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); + return -EINVAL; + } + + get_online_cpus(); + mutex_lock(µcode_mutex); + + user_buffer = (void __user *) buf; + user_buffer_size = (int) len; + + ret = do_microcode_update(); + if (!ret) + ret = (ssize_t)len; + + mutex_unlock(µcode_mutex); + put_online_cpus(); + + return ret; +} + +static const struct file_operations microcode_fops = { + .owner = THIS_MODULE, + .write = microcode_write, + .open = microcode_open, +}; + +static struct miscdevice microcode_dev = { + .minor = MICROCODE_MINOR, + .name = "microcode", + .fops = µcode_fops, +}; + +static int __init microcode_dev_init (void) +{ + int error; + + error = misc_register(µcode_dev); + if (error) { + printk(KERN_ERR + "microcode: can't misc_register on minor=%d\n", + MICROCODE_MINOR); + return error; + } + + return 0; +} + +static void microcode_dev_exit (void) +{ + misc_deregister(µcode_dev); +} + +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); +#else +#define microcode_dev_init() 0 +#define microcode_dev_exit() do { } while(0) +#endif + +static long get_next_ucode_from_buffer(void **mc, const u8 *buf, + unsigned long size, long offset) +{ + struct microcode_header *mc_header; + unsigned long total_size; + + /* No more data */ + if (offset >= size) + return 0; + mc_header = (struct microcode_header *)(buf + offset); + total_size = get_totalsize(mc_header); + + if (offset + total_size > size) { + printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); + return -EINVAL; + } + + *mc = vmalloc(total_size); + if (!*mc) { + printk(KERN_ERR "microcode: error! Can not allocate memory\n"); + return -ENOMEM; + } + memcpy(*mc, buf + offset, total_size); + return offset + total_size; +} + +/* fake device for request_firmware */ +static struct platform_device *microcode_pdev; + +static int cpu_request_microcode(int cpu) +{ + char name[30]; + struct cpuinfo_x86 *c = &cpu_data(cpu); + const struct firmware *firmware; + const u8 *buf; + unsigned long size; + long offset = 0; + int error; + void *mc; + + /* We should bind the task to the CPU */ + BUG_ON(cpu != raw_smp_processor_id()); + sprintf(name,"intel-ucode/%02x-%02x-%02x", + c->x86, c->x86_model, c->x86_mask); + error = request_firmware(&firmware, name, µcode_pdev->dev); + if (error) { + pr_debug("microcode: data file %s load failed\n", name); + return error; + } + buf = firmware->data; + size = firmware->size; + while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset)) + > 0) { + error = microcode_sanity_check(mc); + if (error) + break; + error = get_maching_microcode(mc, cpu); + if (error < 0) + break; + /* + * It's possible the data file has multiple matching ucode, + * lets keep searching till the latest version + */ + if (error == 1) { + apply_microcode(cpu); + error = 0; + } + vfree(mc); + } + if (offset > 0) + vfree(mc); + if (offset < 0) + error = offset; + release_firmware(firmware); + + return error; +} + +static int apply_microcode_check_cpu(int cpu) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); + unsigned int val[2]; + int err = 0; + + /* Check if the microcode is available */ + if (!uci->mc) + return 0; + + old = current->cpus_allowed; + set_cpus_allowed_ptr(current, newmask); + + /* Check if the microcode we have in memory matches the CPU */ + if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || + cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001)) + err = -EINVAL; + + if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) { + /* get processor flags from MSR 0x17 */ + rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); + if (uci->pf != (1 << ((val[1] >> 18) & 7))) + err = -EINVAL; + } + + if (!err) { + wrmsr(MSR_IA32_UCODE_REV, 0, 0); + /* see notes above for revision 1.07. Apparent chip bug */ + sync_core(); + /* get the current revision from MSR 0x8B */ + rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); + if (uci->rev != val[1]) + err = -EINVAL; + } + + if (!err) + apply_microcode(cpu); + else + printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:" + " sig=0x%x, pf=0x%x, rev=0x%x\n", + cpu, uci->sig, uci->pf, uci->rev); + + set_cpus_allowed_ptr(current, &old); + return err; +} + +static void microcode_init_cpu(int cpu, int resume) +{ + cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + old = current->cpus_allowed; + + set_cpus_allowed_ptr(current, newmask); + mutex_lock(µcode_mutex); + collect_cpu_info(cpu); + if (uci->valid && system_state == SYSTEM_RUNNING && !resume) + cpu_request_microcode(cpu); + mutex_unlock(µcode_mutex); + set_cpus_allowed_ptr(current, &old); +} + +static void microcode_fini_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + mutex_lock(µcode_mutex); + uci->valid = 0; + vfree(uci->mc); + uci->mc = NULL; + mutex_unlock(µcode_mutex); +} + +static ssize_t reload_store(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t sz) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + char *end; + unsigned long val = simple_strtoul(buf, &end, 0); + int err = 0; + int cpu = dev->id; + + if (end == buf) + return -EINVAL; + if (val == 1) { + cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); + + old = current->cpus_allowed; + + get_online_cpus(); + set_cpus_allowed_ptr(current, newmask); + + mutex_lock(µcode_mutex); + if (uci->valid) + err = cpu_request_microcode(cpu); + mutex_unlock(µcode_mutex); + put_online_cpus(); + set_cpus_allowed_ptr(current, &old); + } + if (err) + return err; + return sz; +} + +static ssize_t version_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->rev); +} + +static ssize_t pf_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->pf); +} + +static SYSDEV_ATTR(reload, 0200, NULL, reload_store); +static SYSDEV_ATTR(version, 0400, version_show, NULL); +static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); + +static struct attribute *mc_default_attrs[] = { + &attr_reload.attr, + &attr_version.attr, + &attr_processor_flags.attr, + NULL +}; + +static struct attribute_group mc_attr_group = { + .attrs = mc_default_attrs, + .name = "microcode", +}; + +static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) +{ + int err, cpu = sys_dev->id; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!cpu_online(cpu)) + return 0; + + pr_debug("microcode: CPU%d added\n", cpu); + memset(uci, 0, sizeof(*uci)); + + err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); + if (err) + return err; + + microcode_init_cpu(cpu, resume); + + return 0; +} + +static int mc_sysdev_add(struct sys_device *sys_dev) +{ + return __mc_sysdev_add(sys_dev, 0); +} + +static int mc_sysdev_remove(struct sys_device *sys_dev) +{ + int cpu = sys_dev->id; + + if (!cpu_online(cpu)) + return 0; + + pr_debug("microcode: CPU%d removed\n", cpu); + microcode_fini_cpu(cpu); + sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + return 0; +} + +static int mc_sysdev_resume(struct sys_device *dev) +{ + int cpu = dev->id; + + if (!cpu_online(cpu)) + return 0; + pr_debug("microcode: CPU%d resumed\n", cpu); + /* only CPU 0 will apply ucode here */ + apply_microcode(0); + return 0; +} + +static struct sysdev_driver mc_sysdev_driver = { + .add = mc_sysdev_add, + .remove = mc_sysdev_remove, + .resume = mc_sysdev_resume, +}; + +static __cpuinit int +mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_UP_CANCELED_FROZEN: + /* The CPU refused to come up during a system resume */ + microcode_fini_cpu(cpu); + break; + case CPU_ONLINE: + case CPU_DOWN_FAILED: + mc_sysdev_add(sys_dev); + break; + case CPU_ONLINE_FROZEN: + /* System-wide resume is in progress, try to apply microcode */ + if (apply_microcode_check_cpu(cpu)) { + /* The application of microcode failed */ + microcode_fini_cpu(cpu); + __mc_sysdev_add(sys_dev, 1); + break; + } + case CPU_DOWN_FAILED_FROZEN: + if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) + printk(KERN_ERR "microcode: Failed to create the sysfs " + "group for CPU%d\n", cpu); + break; + case CPU_DOWN_PREPARE: + mc_sysdev_remove(sys_dev); + break; + case CPU_DOWN_PREPARE_FROZEN: + /* Suspend is in progress, only remove the interface */ + sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __refdata mc_cpu_notifier = { + .notifier_call = mc_cpu_callback, +}; + +static int __init microcode_init (void) +{ + int error; + + printk(KERN_INFO + "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); + + error = microcode_dev_init(); + if (error) + return error; + microcode_pdev = platform_device_register_simple("microcode", -1, + NULL, 0); + if (IS_ERR(microcode_pdev)) { + microcode_dev_exit(); + return PTR_ERR(microcode_pdev); + } + + get_online_cpus(); + error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); + put_online_cpus(); + if (error) { + microcode_dev_exit(); + platform_device_unregister(microcode_pdev); + return error; + } + + register_hotcpu_notifier(&mc_cpu_notifier); + return 0; +} + +static void __exit microcode_exit (void) +{ + microcode_dev_exit(); + + unregister_hotcpu_notifier(&mc_cpu_notifier); + + get_online_cpus(); + sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); + put_online_cpus(); + + platform_device_unregister(microcode_pdev); +} + +module_init(microcode_init) +module_exit(microcode_exit) -- cgit v0.10.2 From 3e135d887c973b525d43fbb67dfc5972694882f6 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:17 +0200 Subject: x86: code split to two parts Split off existing code into two seperate files. One file holds general code, the other file vendor specific parts. No functional changes, only refactoring. Temporarily Introduced a new module name 'ucode' for result, due to already taken name 'microcode'. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a9be2a0..abb32ae 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -51,7 +51,8 @@ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o -obj-$(CONFIG_MICROCODE) += microcode_intel.o +obj-$(CONFIG_MICROCODE) += ucode.o +ucode-objs := microcode.o microcode_intel.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c new file mode 100644 index 0000000..c1047d7 --- /dev/null +++ b/arch/x86/kernel/microcode.c @@ -0,0 +1,463 @@ +/* + * Intel CPU Microcode Update Driver for Linux + * + * Copyright (C) 2000-2006 Tigran Aivazian + * 2006 Shaohua Li + * + * This driver allows to upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, + * Pentium III, Xeon, Pentium 4, etc. + * + * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture + * Software Developer's Manual + * Order Number 253668 or free download from: + * + * http://developer.intel.com/design/pentium4/manuals/253668.htm + * + * For more information, go to http://www.urbanmyth.org/microcode + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * 1.0 16 Feb 2000, Tigran Aivazian + * Initial release. + * 1.01 18 Feb 2000, Tigran Aivazian + * Added read() support + cleanups. + * 1.02 21 Feb 2000, Tigran Aivazian + * Added 'device trimming' support. open(O_WRONLY) zeroes + * and frees the saved copy of applied microcode. + * 1.03 29 Feb 2000, Tigran Aivazian + * Made to use devfs (/dev/cpu/microcode) + cleanups. + * 1.04 06 Jun 2000, Simon Trimmer + * Added misc device support (now uses both devfs and misc). + * Added MICROCODE_IOCFREE ioctl to clear memory. + * 1.05 09 Jun 2000, Simon Trimmer + * Messages for error cases (non Intel & no suitable microcode). + * 1.06 03 Aug 2000, Tigran Aivazian + * Removed ->release(). Removed exclusive open and status bitmap. + * Added microcode_rwsem to serialize read()/write()/ioctl(). + * Removed global kernel lock usage. + * 1.07 07 Sep 2000, Tigran Aivazian + * Write 0 to 0x8B msr and then cpuid before reading revision, + * so that it works even if there were no update done by the + * BIOS. Otherwise, reading from 0x8B gives junk (which happened + * to be 0 on my machine which is why it worked even when I + * disabled update by the BIOS) + * Thanks to Eric W. Biederman for the fix. + * 1.08 11 Dec 2000, Richard Schaal and + * Tigran Aivazian + * Intel Pentium 4 processor support and bugfixes. + * 1.09 30 Oct 2001, Tigran Aivazian + * Bugfix for HT (Hyper-Threading) enabled processors + * whereby processor resources are shared by all logical processors + * in a single CPU package. + * 1.10 28 Feb 2002 Asit K Mallick and + * Tigran Aivazian , + * Serialize updates as required on HT processors due to speculative + * nature of implementation. + * 1.11 22 Mar 2002 Tigran Aivazian + * Fix the panic when writing zero-length microcode chunk. + * 1.12 29 Sep 2003 Nitin Kamble , + * Jun Nakajima + * Support for the microcode updates in the new format. + * 1.13 10 Oct 2003 Tigran Aivazian + * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl + * because we no longer hold a copy of applied microcode + * in kernel memory. + * 1.14 25 Jun 2004 Tigran Aivazian + * Fix sigmatch() macro to handle old CPUs with pf == 0. + * Thanks to Stuart Swales for pointing out this bug. + */ + +//#define DEBUG /* pr_debug */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +MODULE_DESCRIPTION("Microcode Update Driver"); +MODULE_AUTHOR("Tigran Aivazian "); +MODULE_LICENSE("GPL"); + +#define MICROCODE_VERSION "1.14a" + +/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ +DEFINE_MUTEX(microcode_mutex); + +struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; + +extern long get_next_ucode(void **mc, long offset); +extern int microcode_sanity_check(void *mc); +extern int get_matching_microcode(void *mc, int cpu); +extern void collect_cpu_info(int cpu_num); +extern int cpu_request_microcode(int cpu); +extern void microcode_fini_cpu(int cpu); +extern void apply_microcode(int cpu); +extern int apply_microcode_check_cpu(int cpu); + +#ifdef CONFIG_MICROCODE_OLD_INTERFACE +void __user *user_buffer; /* user area microcode data buffer */ +unsigned int user_buffer_size; /* it's size */ + +static int do_microcode_update (void) +{ + long cursor = 0; + int error = 0; + void *new_mc = NULL; + int cpu; + cpumask_t old; + cpumask_of_cpu_ptr_declare(newmask); + + old = current->cpus_allowed; + + while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) { + error = microcode_sanity_check(new_mc); + if (error) + goto out; + /* + * It's possible the data file has multiple matching ucode, + * lets keep searching till the latest version + */ + for_each_online_cpu(cpu) { + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!uci->valid) + continue; + cpumask_of_cpu_ptr_next(newmask, cpu); + set_cpus_allowed_ptr(current, newmask); + error = get_maching_microcode(new_mc, cpu); + if (error < 0) + goto out; + if (error == 1) + apply_microcode(cpu); + } + vfree(new_mc); + } +out: + if (cursor > 0) + vfree(new_mc); + if (cursor < 0) + error = cursor; + set_cpus_allowed_ptr(current, &old); + return error; +} + +static int microcode_open (struct inode *unused1, struct file *unused2) +{ + cycle_kernel_lock(); + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; +} + +static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) +{ + ssize_t ret; + + if ((len >> PAGE_SHIFT) > num_physpages) { + printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); + return -EINVAL; + } + + get_online_cpus(); + mutex_lock(µcode_mutex); + + user_buffer = (void __user *) buf; + user_buffer_size = (int) len; + + ret = do_microcode_update(); + if (!ret) + ret = (ssize_t)len; + + mutex_unlock(µcode_mutex); + put_online_cpus(); + + return ret; +} + +static const struct file_operations microcode_fops = { + .owner = THIS_MODULE, + .write = microcode_write, + .open = microcode_open, +}; + +static struct miscdevice microcode_dev = { + .minor = MICROCODE_MINOR, + .name = "microcode", + .fops = µcode_fops, +}; + +static int __init microcode_dev_init (void) +{ + int error; + + error = misc_register(µcode_dev); + if (error) { + printk(KERN_ERR + "microcode: can't misc_register on minor=%d\n", + MICROCODE_MINOR); + return error; + } + + return 0; +} + +static void microcode_dev_exit (void) +{ + misc_deregister(µcode_dev); +} + +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); +#else +#define microcode_dev_init() 0 +#define microcode_dev_exit() do { } while(0) +#endif + +/* fake device for request_firmware */ +struct platform_device *microcode_pdev; + +static void microcode_init_cpu(int cpu, int resume) +{ + cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + old = current->cpus_allowed; + + set_cpus_allowed_ptr(current, newmask); + mutex_lock(µcode_mutex); + collect_cpu_info(cpu); + if (uci->valid && system_state == SYSTEM_RUNNING && !resume) + cpu_request_microcode(cpu); + mutex_unlock(µcode_mutex); + set_cpus_allowed_ptr(current, &old); +} + +static ssize_t reload_store(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t sz) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + char *end; + unsigned long val = simple_strtoul(buf, &end, 0); + int err = 0; + int cpu = dev->id; + + if (end == buf) + return -EINVAL; + if (val == 1) { + cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); + + old = current->cpus_allowed; + + get_online_cpus(); + set_cpus_allowed_ptr(current, newmask); + + mutex_lock(µcode_mutex); + if (uci->valid) + err = cpu_request_microcode(cpu); + mutex_unlock(µcode_mutex); + put_online_cpus(); + set_cpus_allowed_ptr(current, &old); + } + if (err) + return err; + return sz; +} + +static ssize_t version_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->rev); +} + +static ssize_t pf_show(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->pf); +} + +static SYSDEV_ATTR(reload, 0200, NULL, reload_store); +static SYSDEV_ATTR(version, 0400, version_show, NULL); +static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); + +static struct attribute *mc_default_attrs[] = { + &attr_reload.attr, + &attr_version.attr, + &attr_processor_flags.attr, + NULL +}; + +static struct attribute_group mc_attr_group = { + .attrs = mc_default_attrs, + .name = "microcode", +}; + +static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) +{ + int err, cpu = sys_dev->id; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!cpu_online(cpu)) + return 0; + + pr_debug("microcode: CPU%d added\n", cpu); + memset(uci, 0, sizeof(*uci)); + + err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); + if (err) + return err; + + microcode_init_cpu(cpu, resume); + + return 0; +} + +static int mc_sysdev_add(struct sys_device *sys_dev) +{ + return __mc_sysdev_add(sys_dev, 0); +} + +static int mc_sysdev_remove(struct sys_device *sys_dev) +{ + int cpu = sys_dev->id; + + if (!cpu_online(cpu)) + return 0; + + pr_debug("microcode: CPU%d removed\n", cpu); + microcode_fini_cpu(cpu); + sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + return 0; +} + +static int mc_sysdev_resume(struct sys_device *dev) +{ + int cpu = dev->id; + + if (!cpu_online(cpu)) + return 0; + pr_debug("microcode: CPU%d resumed\n", cpu); + /* only CPU 0 will apply ucode here */ + apply_microcode(0); + return 0; +} + +static struct sysdev_driver mc_sysdev_driver = { + .add = mc_sysdev_add, + .remove = mc_sysdev_remove, + .resume = mc_sysdev_resume, +}; + +static __cpuinit int +mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_UP_CANCELED_FROZEN: + /* The CPU refused to come up during a system resume */ + microcode_fini_cpu(cpu); + break; + case CPU_ONLINE: + case CPU_DOWN_FAILED: + mc_sysdev_add(sys_dev); + break; + case CPU_ONLINE_FROZEN: + /* System-wide resume is in progress, try to apply microcode */ + if (apply_microcode_check_cpu(cpu)) { + /* The application of microcode failed */ + microcode_fini_cpu(cpu); + __mc_sysdev_add(sys_dev, 1); + break; + } + case CPU_DOWN_FAILED_FROZEN: + if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) + printk(KERN_ERR "microcode: Failed to create the sysfs " + "group for CPU%d\n", cpu); + break; + case CPU_DOWN_PREPARE: + mc_sysdev_remove(sys_dev); + break; + case CPU_DOWN_PREPARE_FROZEN: + /* Suspend is in progress, only remove the interface */ + sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __refdata mc_cpu_notifier = { + .notifier_call = mc_cpu_callback, +}; + +static int __init microcode_init (void) +{ + int error; + + printk(KERN_INFO + "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); + + error = microcode_dev_init(); + if (error) + return error; + microcode_pdev = platform_device_register_simple("microcode", -1, + NULL, 0); + if (IS_ERR(microcode_pdev)) { + microcode_dev_exit(); + return PTR_ERR(microcode_pdev); + } + + get_online_cpus(); + error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); + put_online_cpus(); + if (error) { + microcode_dev_exit(); + platform_device_unregister(microcode_pdev); + return error; + } + + register_hotcpu_notifier(&mc_cpu_notifier); + return 0; +} + +static void __exit microcode_exit (void) +{ + microcode_dev_exit(); + + unregister_hotcpu_notifier(&mc_cpu_notifier); + + get_online_cpus(); + sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); + put_online_cpus(); + + platform_device_unregister(microcode_pdev); +} + +module_init(microcode_init) +module_exit(microcode_exit) diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 4e7b2f6..eded0a1 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -95,18 +95,16 @@ #include #include -MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); +MODULE_DESCRIPTION("Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian "); MODULE_LICENSE("GPL"); -#define MICROCODE_VERSION "1.14a" - #define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ -#define MC_HEADER_SIZE (sizeof (struct microcode_header)) /* 48 bytes */ +#define MC_HEADER_SIZE (sizeof(struct microcode_header)) /* 48 bytes */ #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ -#define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */ -#define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */ -#define DWSIZE (sizeof (u32)) +#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) /* 20 bytes */ +#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) /* 12 bytes */ +#define DWSIZE (sizeof(u32)) #define get_totalsize(mc) \ (((struct microcode *)mc)->hdr.totalsize ? \ ((struct microcode *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) @@ -123,11 +121,11 @@ MODULE_LICENSE("GPL"); static DEFINE_SPINLOCK(microcode_update_lock); /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -static DEFINE_MUTEX(microcode_mutex); +extern struct mutex microcode_mutex; -static struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; +extern struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; -static void collect_cpu_info(int cpu_num) +void collect_cpu_info(int cpu_num) { struct cpuinfo_x86 *c = &cpu_data(cpu_num); struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; @@ -140,7 +138,7 @@ static void collect_cpu_info(int cpu_num) uci->valid = 1; if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64)) { + cpu_has(c, X86_FEATURE_IA64)) { printk(KERN_ERR "microcode: CPU%d not a capable Intel " "processor\n", cpu_num); uci->valid = 0; @@ -175,7 +173,7 @@ static inline int microcode_update_match(int cpu_num, return 1; } -static int microcode_sanity_check(void *mc) +int microcode_sanity_check(void *mc) { struct microcode_header *mc_header = mc; struct extended_sigtable *ext_header = NULL; @@ -259,7 +257,7 @@ static int microcode_sanity_check(void *mc) * return 1 - found update * return < 0 - error */ -static int get_maching_microcode(void *mc, int cpu) +int get_matching_microcode(void *mc, int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct microcode_header *mc_header = mc; @@ -288,7 +286,7 @@ static int get_maching_microcode(void *mc, int cpu) return 0; find: pr_debug("microcode: CPU%d found a matching microcode update with" - " version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev); + " version 0x%x (current=0x%x)\n", cpu, mc_header->rev, uci->rev); new_mc = vmalloc(total_size); if (!new_mc) { printk(KERN_ERR "microcode: error! Can not allocate memory\n"); @@ -303,7 +301,7 @@ find: return 1; } -static void apply_microcode(int cpu) +void apply_microcode(int cpu) { unsigned long flags; unsigned int val[2]; @@ -344,10 +342,10 @@ static void apply_microcode(int cpu) } #ifdef CONFIG_MICROCODE_OLD_INTERFACE -static void __user *user_buffer; /* user area microcode data buffer */ -static unsigned int user_buffer_size; /* it's size */ +extern void __user *user_buffer; /* user area microcode data buffer */ +extern unsigned int user_buffer_size; /* it's size */ -static long get_next_ucode(void **mc, long offset) +long get_next_ucode(void **mc, long offset) { struct microcode_header mc_header; unsigned long total_size; @@ -375,117 +373,6 @@ static long get_next_ucode(void **mc, long offset) } return offset + total_size; } - -static int do_microcode_update (void) -{ - long cursor = 0; - int error = 0; - void *new_mc = NULL; - int cpu; - cpumask_t old; - cpumask_of_cpu_ptr_declare(newmask); - - old = current->cpus_allowed; - - while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) { - error = microcode_sanity_check(new_mc); - if (error) - goto out; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - for_each_online_cpu(cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!uci->valid) - continue; - cpumask_of_cpu_ptr_next(newmask, cpu); - set_cpus_allowed_ptr(current, newmask); - error = get_maching_microcode(new_mc, cpu); - if (error < 0) - goto out; - if (error == 1) - apply_microcode(cpu); - } - vfree(new_mc); - } -out: - if (cursor > 0) - vfree(new_mc); - if (cursor < 0) - error = cursor; - set_cpus_allowed_ptr(current, &old); - return error; -} - -static int microcode_open (struct inode *unused1, struct file *unused2) -{ - cycle_kernel_lock(); - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; -} - -static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) -{ - ssize_t ret; - - if ((len >> PAGE_SHIFT) > num_physpages) { - printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); - return -EINVAL; - } - - get_online_cpus(); - mutex_lock(µcode_mutex); - - user_buffer = (void __user *) buf; - user_buffer_size = (int) len; - - ret = do_microcode_update(); - if (!ret) - ret = (ssize_t)len; - - mutex_unlock(µcode_mutex); - put_online_cpus(); - - return ret; -} - -static const struct file_operations microcode_fops = { - .owner = THIS_MODULE, - .write = microcode_write, - .open = microcode_open, -}; - -static struct miscdevice microcode_dev = { - .minor = MICROCODE_MINOR, - .name = "microcode", - .fops = µcode_fops, -}; - -static int __init microcode_dev_init (void) -{ - int error; - - error = misc_register(µcode_dev); - if (error) { - printk(KERN_ERR - "microcode: can't misc_register on minor=%d\n", - MICROCODE_MINOR); - return error; - } - - return 0; -} - -static void microcode_dev_exit (void) -{ - misc_deregister(µcode_dev); -} - -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); -#else -#define microcode_dev_init() 0 -#define microcode_dev_exit() do { } while(0) #endif static long get_next_ucode_from_buffer(void **mc, const u8 *buf, @@ -515,9 +402,9 @@ static long get_next_ucode_from_buffer(void **mc, const u8 *buf, } /* fake device for request_firmware */ -static struct platform_device *microcode_pdev; +extern struct platform_device *microcode_pdev; -static int cpu_request_microcode(int cpu) +int cpu_request_microcode(int cpu) { char name[30]; struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -530,7 +417,7 @@ static int cpu_request_microcode(int cpu) /* We should bind the task to the CPU */ BUG_ON(cpu != raw_smp_processor_id()); - sprintf(name,"intel-ucode/%02x-%02x-%02x", + sprintf(name, "intel-ucode/%02x-%02x-%02x", c->x86, c->x86_model, c->x86_mask); error = request_firmware(&firmware, name, µcode_pdev->dev); if (error) { @@ -544,7 +431,7 @@ static int cpu_request_microcode(int cpu) error = microcode_sanity_check(mc); if (error) break; - error = get_maching_microcode(mc, cpu); + error = get_matching_microcode(mc, cpu); if (error < 0) break; /* @@ -566,7 +453,7 @@ static int cpu_request_microcode(int cpu) return error; } -static int apply_microcode_check_cpu(int cpu) +int apply_microcode_check_cpu(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; @@ -615,241 +502,13 @@ static int apply_microcode_check_cpu(int cpu) return err; } -static void microcode_init_cpu(int cpu, int resume) -{ - cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - old = current->cpus_allowed; - - set_cpus_allowed_ptr(current, newmask); - mutex_lock(µcode_mutex); - collect_cpu_info(cpu); - if (uci->valid && system_state == SYSTEM_RUNNING && !resume) - cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); - set_cpus_allowed_ptr(current, &old); -} - -static void microcode_fini_cpu(int cpu) +void microcode_fini_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; mutex_lock(µcode_mutex); uci->valid = 0; - vfree(uci->mc); + kfree(uci->mc); uci->mc = NULL; mutex_unlock(µcode_mutex); } - -static ssize_t reload_store(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, size_t sz) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - char *end; - unsigned long val = simple_strtoul(buf, &end, 0); - int err = 0; - int cpu = dev->id; - - if (end == buf) - return -EINVAL; - if (val == 1) { - cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); - - old = current->cpus_allowed; - - get_online_cpus(); - set_cpus_allowed_ptr(current, newmask); - - mutex_lock(µcode_mutex); - if (uci->valid) - err = cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); - put_online_cpus(); - set_cpus_allowed_ptr(current, &old); - } - if (err) - return err; - return sz; -} - -static ssize_t version_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->rev); -} - -static ssize_t pf_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->pf); -} - -static SYSDEV_ATTR(reload, 0200, NULL, reload_store); -static SYSDEV_ATTR(version, 0400, version_show, NULL); -static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); - -static struct attribute *mc_default_attrs[] = { - &attr_reload.attr, - &attr_version.attr, - &attr_processor_flags.attr, - NULL -}; - -static struct attribute_group mc_attr_group = { - .attrs = mc_default_attrs, - .name = "microcode", -}; - -static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) -{ - int err, cpu = sys_dev->id; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("microcode: CPU%d added\n", cpu); - memset(uci, 0, sizeof(*uci)); - - err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); - if (err) - return err; - - microcode_init_cpu(cpu, resume); - - return 0; -} - -static int mc_sysdev_add(struct sys_device *sys_dev) -{ - return __mc_sysdev_add(sys_dev, 0); -} - -static int mc_sysdev_remove(struct sys_device *sys_dev) -{ - int cpu = sys_dev->id; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("microcode: CPU%d removed\n", cpu); - microcode_fini_cpu(cpu); - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - return 0; -} - -static int mc_sysdev_resume(struct sys_device *dev) -{ - int cpu = dev->id; - - if (!cpu_online(cpu)) - return 0; - pr_debug("microcode: CPU%d resumed\n", cpu); - /* only CPU 0 will apply ucode here */ - apply_microcode(0); - return 0; -} - -static struct sysdev_driver mc_sysdev_driver = { - .add = mc_sysdev_add, - .remove = mc_sysdev_remove, - .resume = mc_sysdev_resume, -}; - -static __cpuinit int -mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; - - sys_dev = get_cpu_sysdev(cpu); - switch (action) { - case CPU_UP_CANCELED_FROZEN: - /* The CPU refused to come up during a system resume */ - microcode_fini_cpu(cpu); - break; - case CPU_ONLINE: - case CPU_DOWN_FAILED: - mc_sysdev_add(sys_dev); - break; - case CPU_ONLINE_FROZEN: - /* System-wide resume is in progress, try to apply microcode */ - if (apply_microcode_check_cpu(cpu)) { - /* The application of microcode failed */ - microcode_fini_cpu(cpu); - __mc_sysdev_add(sys_dev, 1); - break; - } - case CPU_DOWN_FAILED_FROZEN: - if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) - printk(KERN_ERR "microcode: Failed to create the sysfs " - "group for CPU%d\n", cpu); - break; - case CPU_DOWN_PREPARE: - mc_sysdev_remove(sys_dev); - break; - case CPU_DOWN_PREPARE_FROZEN: - /* Suspend is in progress, only remove the interface */ - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __refdata mc_cpu_notifier = { - .notifier_call = mc_cpu_callback, -}; - -static int __init microcode_init (void) -{ - int error; - - printk(KERN_INFO - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); - - error = microcode_dev_init(); - if (error) - return error; - microcode_pdev = platform_device_register_simple("microcode", -1, - NULL, 0); - if (IS_ERR(microcode_pdev)) { - microcode_dev_exit(); - return PTR_ERR(microcode_pdev); - } - - get_online_cpus(); - error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); - put_online_cpus(); - if (error) { - microcode_dev_exit(); - platform_device_unregister(microcode_pdev); - return error; - } - - register_hotcpu_notifier(&mc_cpu_notifier); - return 0; -} - -static void __exit microcode_exit (void) -{ - microcode_dev_exit(); - - unregister_hotcpu_notifier(&mc_cpu_notifier); - - get_online_cpus(); - sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); - put_online_cpus(); - - platform_device_unregister(microcode_pdev); -} - -module_init(microcode_init) -module_exit(microcode_exit) -- cgit v0.10.2 From d4ee36686853d5714437c4409f17ad42bfaf4211 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:18 +0200 Subject: x86: structure declaration renaming Renamed common structures to vendor specific naming scheme so other vendors will be able to use the same naming convention. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index eded0a1..ca9861b 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -100,17 +100,19 @@ MODULE_AUTHOR("Tigran Aivazian "); MODULE_LICENSE("GPL"); #define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ -#define MC_HEADER_SIZE (sizeof(struct microcode_header)) /* 48 bytes */ +#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) /* 48 bytes */ #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ #define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) /* 20 bytes */ #define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) /* 12 bytes */ #define DWSIZE (sizeof(u32)) #define get_totalsize(mc) \ - (((struct microcode *)mc)->hdr.totalsize ? \ - ((struct microcode *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) + (((struct microcode_intel *)mc)->hdr.totalsize ? \ + ((struct microcode_intel *)mc)->hdr.totalsize : \ + DEFAULT_UCODE_TOTALSIZE) + #define get_datasize(mc) \ - (((struct microcode *)mc)->hdr.datasize ? \ - ((struct microcode *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) + (((struct microcode_intel *)mc)->hdr.datasize ? \ + ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) #define sigmatch(s1, s2, p1, p2) \ (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) @@ -134,7 +136,7 @@ void collect_cpu_info(int cpu_num) /* We should bind the task to the CPU */ BUG_ON(raw_smp_processor_id() != cpu_num); uci->pf = uci->rev = 0; - uci->mc = NULL; + uci->mc.mc_intel = NULL; uci->valid = 1; if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || @@ -163,7 +165,7 @@ void collect_cpu_info(int cpu_num) } static inline int microcode_update_match(int cpu_num, - struct microcode_header *mc_header, int sig, int pf) + struct microcode_header_intel *mc_header, int sig, int pf) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; @@ -175,7 +177,7 @@ static inline int microcode_update_match(int cpu_num, int microcode_sanity_check(void *mc) { - struct microcode_header *mc_header = mc; + struct microcode_header_intel *mc_header = mc; struct extended_sigtable *ext_header = NULL; struct extended_signature *ext_sig; unsigned long total_size, data_size, ext_table_size; @@ -260,7 +262,7 @@ int microcode_sanity_check(void *mc) int get_matching_microcode(void *mc, int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - struct microcode_header *mc_header = mc; + struct microcode_header_intel *mc_header = mc; struct extended_sigtable *ext_header; unsigned long total_size = get_totalsize(mc_header); int ext_sigcount, i; @@ -294,10 +296,10 @@ find: } /* free previous update file */ - vfree(uci->mc); + vfree(uci->mc.mc_intel); memcpy(new_mc, mc, total_size); - uci->mc = new_mc; + uci->mc.mc_intel = new_mc; return 1; } @@ -311,7 +313,7 @@ void apply_microcode(int cpu) /* We should bind the task to the CPU */ BUG_ON(cpu_num != cpu); - if (uci->mc == NULL) + if (uci->mc.mc_intel == NULL) return; /* serialize access to the physical write to MSR 0x79 */ @@ -319,8 +321,8 @@ void apply_microcode(int cpu) /* write microcode via MSR 0x79 */ wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) uci->mc->bits, - (unsigned long) uci->mc->bits >> 16 >> 16); + (unsigned long) uci->mc.mc_intel->bits, + (unsigned long) uci->mc.mc_intel->bits >> 16 >> 16); wrmsr(MSR_IA32_UCODE_REV, 0, 0); /* see notes above for revision 1.07. Apparent chip bug */ @@ -330,14 +332,14 @@ void apply_microcode(int cpu) rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); spin_unlock_irqrestore(µcode_update_lock, flags); - if (val[1] != uci->mc->hdr.rev) { + if (val[1] != uci->mc.mc_intel->hdr.rev) { printk(KERN_ERR "microcode: CPU%d update from revision " "0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]); return; } printk(KERN_INFO "microcode: CPU%d updated from revision " "0x%x to 0x%x, date = %08x \n", - cpu_num, uci->rev, val[1], uci->mc->hdr.date); + cpu_num, uci->rev, val[1], uci->mc.mc_intel->hdr.date); uci->rev = val[1]; } @@ -347,7 +349,7 @@ extern unsigned int user_buffer_size; /* it's size */ long get_next_ucode(void **mc, long offset) { - struct microcode_header mc_header; + struct microcode_header_intel mc_header; unsigned long total_size; /* No more data */ @@ -378,13 +380,13 @@ long get_next_ucode(void **mc, long offset) static long get_next_ucode_from_buffer(void **mc, const u8 *buf, unsigned long size, long offset) { - struct microcode_header *mc_header; + struct microcode_header_intel *mc_header; unsigned long total_size; /* No more data */ if (offset >= size) return 0; - mc_header = (struct microcode_header *)(buf + offset); + mc_header = (struct microcode_header_intel *)(buf + offset); total_size = get_totalsize(mc_header); if (offset + total_size > size) { @@ -463,7 +465,7 @@ int apply_microcode_check_cpu(int cpu) int err = 0; /* Check if the microcode is available */ - if (!uci->mc) + if (!uci->mc.mc_intel) return 0; old = current->cpus_allowed; @@ -508,7 +510,7 @@ void microcode_fini_cpu(int cpu) mutex_lock(µcode_mutex); uci->valid = 0; - kfree(uci->mc); - uci->mc = NULL; + kfree(uci->mc.mc_intel); + uci->mc.mc_intel = NULL; mutex_unlock(µcode_mutex); } diff --git a/include/asm-x86/microcode.h b/include/asm-x86/microcode.h index d34a1fc..ef77c6f 100644 --- a/include/asm-x86/microcode.h +++ b/include/asm-x86/microcode.h @@ -1,4 +1,4 @@ -struct microcode_header { +struct microcode_header_intel { unsigned int hdrver; unsigned int rev; unsigned int date; @@ -11,8 +11,8 @@ struct microcode_header { unsigned int reserved[3]; }; -struct microcode { - struct microcode_header hdr; +struct microcode_intel { + struct microcode_header_intel hdr; unsigned int bits[0]; }; @@ -35,5 +35,7 @@ struct ucode_cpu_info { unsigned int sig; unsigned int pf; unsigned int rev; - struct microcode *mc; + union { + struct microcode_intel *mc_intel; + } mc; }; -- cgit v0.10.2 From 9835fd4ad9ee5fc6b909df72aa3e3dba04415f4b Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:19 +0200 Subject: x86: add AMD specific declarations Added AMD specific declarations to header file. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/microcode.h b/include/asm-x86/microcode.h index ef77c6f..4e94172 100644 --- a/include/asm-x86/microcode.h +++ b/include/asm-x86/microcode.h @@ -30,6 +30,35 @@ struct extended_sigtable { struct extended_signature sigs[0]; }; +struct equiv_cpu_entry { + unsigned int installed_cpu; + unsigned int fixed_errata_mask; + unsigned int fixed_errata_compare; + unsigned int equiv_cpu; +}; + +struct microcode_header_amd { + unsigned int data_code; + unsigned int patch_id; + unsigned char mc_patch_data_id[2]; + unsigned char mc_patch_data_len; + unsigned char init_flag; + unsigned int mc_patch_data_checksum; + unsigned int nb_dev_id; + unsigned int sb_dev_id; + unsigned char processor_rev_id[2]; + unsigned char nb_rev_id; + unsigned char sb_rev_id; + unsigned char bios_api_rev; + unsigned char reserved1[3]; + unsigned int match_reg[8]; +}; + +struct microcode_amd { + struct microcode_header_amd hdr; + unsigned int mpb[0]; +}; + struct ucode_cpu_info { int valid; unsigned int sig; @@ -37,5 +66,6 @@ struct ucode_cpu_info { unsigned int rev; union { struct microcode_intel *mc_intel; + struct microcode_amd *mc_amd; } mc; }; -- cgit v0.10.2 From 26bf7a48c33906cc3415a4492aa9ead7a75f1353 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:20 +0200 Subject: x86: first step of refactoring, introducing microcode_ops Refactoring with the goal of having one general module and separate vendor specific modules that hook into the general one. Microcode_ops is a function pointer structure in which vendor specific modules will enter all functions that differ between vendors and that need to be accessed from the general module. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/microcode.h b/include/asm-x86/microcode.h index 4e94172..9231c87 100644 --- a/include/asm-x86/microcode.h +++ b/include/asm-x86/microcode.h @@ -1,3 +1,16 @@ +struct microcode_ops { + long (*get_next_ucode)(void **mc, long offset); + long (*microcode_get_next_ucode)(void **mc, long offset); + int (*get_matching_microcode)(void *mc, int cpu); + int (*apply_microcode_check_cpu)(int cpu); + int (*microcode_sanity_check)(void *mc); + int (*cpu_request_microcode)(int cpu); + void (*collect_cpu_info)(int cpu_num); + void (*apply_microcode)(int cpu); + void (*microcode_fini_cpu)(int cpu); + void (*clear_patch)(void *data); +}; + struct microcode_header_intel { unsigned int hdrver; unsigned int rev; -- cgit v0.10.2 From 8d86f390d9bb5b39f0a315838d1616de6363e1b9 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:21 +0200 Subject: x86: major refactoring Refactored code by introducing a two-module solution. There is one general module in which vendor specific modules can hook into. However, that is exclusive, there is only one vendor specific module allowed at a time. A CPU vendor check makes sure only the correct module for the underlying system gets called. Functinally in terms of patch loading itself there are no changes. This refactoring provides a basis for future implementations of other vendors' patch loaders. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b6fa287..6b0b885c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -782,7 +782,7 @@ config X86_REBOOTFIXUPS Say N otherwise. config MICROCODE - tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support" + tristate "/dev/cpu/microcode - microcode support" select FW_LOADER ---help--- If you say Y here, you will be able to update the microcode on @@ -791,14 +791,29 @@ config MICROCODE actual microcode binary data itself which is not shipped with the Linux kernel. - For latest news and information on obtaining all the required - ingredients for this driver, check: - . + This option selects the general module only, you need to select + at least one vendor specific module as well. To compile this driver as a module, choose M here: the module will be called microcode. -config MICROCODE_OLD_INTERFACE +config MICROCODE_INTEL + tristate "Intel microcode patch loading support" + depends on MICROCODE + default MICROCODE + select FW_LOADER + --help--- + This options enables microcode patch loading support for Intel + processors. + + For latest news and information on obtaining all the required + Intel ingredients for this driver, check: + . + + This driver is only available as a module: the module + will be called microcode_intel. + + config MICROCODE_OLD_INTERFACE def_bool y depends on MICROCODE diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index abb32ae..f2f9f6d 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -51,8 +51,8 @@ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o -obj-$(CONFIG_MICROCODE) += ucode.o -ucode-objs := microcode.o microcode_intel.o +obj-$(CONFIG_MICROCODE) += microcode.o +obj-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index c1047d7..1e42e79 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -99,25 +99,22 @@ MODULE_DESCRIPTION("Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian "); MODULE_LICENSE("GPL"); -#define MICROCODE_VERSION "1.14a" +#define MICROCODE_VERSION "2.00" -/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -DEFINE_MUTEX(microcode_mutex); +struct microcode_ops *microcode_ops; -struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; +/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ +static DEFINE_MUTEX(microcode_mutex); +EXPORT_SYMBOL_GPL(microcode_mutex); -extern long get_next_ucode(void **mc, long offset); -extern int microcode_sanity_check(void *mc); -extern int get_matching_microcode(void *mc, int cpu); -extern void collect_cpu_info(int cpu_num); -extern int cpu_request_microcode(int cpu); -extern void microcode_fini_cpu(int cpu); -extern void apply_microcode(int cpu); -extern int apply_microcode_check_cpu(int cpu); +static struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; +EXPORT_SYMBOL_GPL(ucode_cpu_info); #ifdef CONFIG_MICROCODE_OLD_INTERFACE -void __user *user_buffer; /* user area microcode data buffer */ -unsigned int user_buffer_size; /* it's size */ +static void __user *user_buffer; /* user area microcode data buffer */ +EXPORT_SYMBOL_GPL(user_buffer); +static unsigned int user_buffer_size; /* it's size */ +EXPORT_SYMBOL_GPL(user_buffer_size); static int do_microcode_update (void) { @@ -130,8 +127,8 @@ static int do_microcode_update (void) old = current->cpus_allowed; - while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) { - error = microcode_sanity_check(new_mc); + while ((cursor = microcode_ops->get_next_ucode(&new_mc, cursor)) > 0) { + error = microcode_ops->microcode_sanity_check(new_mc); if (error) goto out; /* @@ -145,11 +142,12 @@ static int do_microcode_update (void) continue; cpumask_of_cpu_ptr_next(newmask, cpu); set_cpus_allowed_ptr(current, newmask); - error = get_maching_microcode(new_mc, cpu); + error = microcode_ops->get_matching_microcode(new_mc, + cpu); if (error < 0) goto out; if (error == 1) - apply_microcode(cpu); + microcode_ops->apply_microcode(cpu); } vfree(new_mc); } @@ -232,7 +230,8 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); #endif /* fake device for request_firmware */ -struct platform_device *microcode_pdev; +static struct platform_device *microcode_pdev; +EXPORT_SYMBOL_GPL(microcode_pdev); static void microcode_init_cpu(int cpu, int resume) { @@ -244,9 +243,9 @@ static void microcode_init_cpu(int cpu, int resume) set_cpus_allowed_ptr(current, newmask); mutex_lock(µcode_mutex); - collect_cpu_info(cpu); + microcode_ops->collect_cpu_info(cpu); if (uci->valid && system_state == SYSTEM_RUNNING && !resume) - cpu_request_microcode(cpu); + microcode_ops->cpu_request_microcode(cpu); mutex_unlock(µcode_mutex); set_cpus_allowed_ptr(current, &old); } @@ -274,7 +273,7 @@ static ssize_t reload_store(struct sys_device *dev, mutex_lock(µcode_mutex); if (uci->valid) - err = cpu_request_microcode(cpu); + err = microcode_ops->cpu_request_microcode(cpu); mutex_unlock(µcode_mutex); put_online_cpus(); set_cpus_allowed_ptr(current, &old); @@ -349,7 +348,7 @@ static int mc_sysdev_remove(struct sys_device *sys_dev) return 0; pr_debug("microcode: CPU%d removed\n", cpu); - microcode_fini_cpu(cpu); + microcode_ops->microcode_fini_cpu(cpu); sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); return 0; } @@ -362,7 +361,7 @@ static int mc_sysdev_resume(struct sys_device *dev) return 0; pr_debug("microcode: CPU%d resumed\n", cpu); /* only CPU 0 will apply ucode here */ - apply_microcode(0); + microcode_ops->apply_microcode(0); return 0; } @@ -382,7 +381,7 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) switch (action) { case CPU_UP_CANCELED_FROZEN: /* The CPU refused to come up during a system resume */ - microcode_fini_cpu(cpu); + microcode_ops->microcode_fini_cpu(cpu); break; case CPU_ONLINE: case CPU_DOWN_FAILED: @@ -390,9 +389,9 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) break; case CPU_ONLINE_FROZEN: /* System-wide resume is in progress, try to apply microcode */ - if (apply_microcode_check_cpu(cpu)) { + if (microcode_ops->apply_microcode_check_cpu(cpu)) { /* The application of microcode failed */ - microcode_fini_cpu(cpu); + microcode_ops->microcode_fini_cpu(cpu); __mc_sysdev_add(sys_dev, 1); break; } @@ -416,12 +415,17 @@ static struct notifier_block __refdata mc_cpu_notifier = { .notifier_call = mc_cpu_callback, }; -static int __init microcode_init (void) +static int microcode_init(void *opaque, struct module *module) { + struct microcode_ops *ops = (struct microcode_ops *)opaque; int error; - printk(KERN_INFO - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); + if (microcode_ops) { + printk(KERN_ERR "microcode: already loaded the other module\n"); + return -EEXIST; + } + + microcode_ops = ops; error = microcode_dev_init(); if (error) @@ -443,8 +447,15 @@ static int __init microcode_init (void) } register_hotcpu_notifier(&mc_cpu_notifier); + + printk(KERN_INFO + "Microcode Update Driver: v" MICROCODE_VERSION + " " + " \n"); + return 0; } +EXPORT_SYMBOL_GPL(microcode_init); static void __exit microcode_exit (void) { @@ -457,7 +468,10 @@ static void __exit microcode_exit (void) put_online_cpus(); platform_device_unregister(microcode_pdev); -} -module_init(microcode_init) -module_exit(microcode_exit) + microcode_ops = NULL; + + printk(KERN_INFO + "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); +} +EXPORT_SYMBOL_GPL(microcode_exit); diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index ca9861b..831db53 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -127,7 +127,7 @@ extern struct mutex microcode_mutex; extern struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; -void collect_cpu_info(int cpu_num) +static void collect_cpu_info(int cpu_num) { struct cpuinfo_x86 *c = &cpu_data(cpu_num); struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; @@ -175,7 +175,7 @@ static inline int microcode_update_match(int cpu_num, return 1; } -int microcode_sanity_check(void *mc) +static int microcode_sanity_check(void *mc) { struct microcode_header_intel *mc_header = mc; struct extended_sigtable *ext_header = NULL; @@ -259,7 +259,7 @@ int microcode_sanity_check(void *mc) * return 1 - found update * return < 0 - error */ -int get_matching_microcode(void *mc, int cpu) +static int get_matching_microcode(void *mc, int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct microcode_header_intel *mc_header = mc; @@ -288,7 +288,8 @@ int get_matching_microcode(void *mc, int cpu) return 0; find: pr_debug("microcode: CPU%d found a matching microcode update with" - " version 0x%x (current=0x%x)\n", cpu, mc_header->rev, uci->rev); + " version 0x%x (current=0x%x)\n", + cpu, mc_header->rev, uci->rev); new_mc = vmalloc(total_size); if (!new_mc) { printk(KERN_ERR "microcode: error! Can not allocate memory\n"); @@ -303,7 +304,7 @@ find: return 1; } -void apply_microcode(int cpu) +static void apply_microcode(int cpu) { unsigned long flags; unsigned int val[2]; @@ -347,7 +348,7 @@ void apply_microcode(int cpu) extern void __user *user_buffer; /* user area microcode data buffer */ extern unsigned int user_buffer_size; /* it's size */ -long get_next_ucode(void **mc, long offset) +static long get_next_ucode(void **mc, long offset) { struct microcode_header_intel mc_header; unsigned long total_size; @@ -406,7 +407,7 @@ static long get_next_ucode_from_buffer(void **mc, const u8 *buf, /* fake device for request_firmware */ extern struct platform_device *microcode_pdev; -int cpu_request_microcode(int cpu) +static int cpu_request_microcode(int cpu) { char name[30]; struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -455,7 +456,7 @@ int cpu_request_microcode(int cpu) return error; } -int apply_microcode_check_cpu(int cpu) +static int apply_microcode_check_cpu(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; @@ -504,13 +505,42 @@ int apply_microcode_check_cpu(int cpu) return err; } -void microcode_fini_cpu(int cpu) +static void microcode_fini_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; mutex_lock(µcode_mutex); uci->valid = 0; - kfree(uci->mc.mc_intel); + vfree(uci->mc.mc_intel); uci->mc.mc_intel = NULL; mutex_unlock(µcode_mutex); } + +static struct microcode_ops microcode_intel_ops = { + .get_next_ucode = get_next_ucode, + .get_matching_microcode = get_matching_microcode, + .microcode_sanity_check = microcode_sanity_check, + .apply_microcode_check_cpu = apply_microcode_check_cpu, + .cpu_request_microcode = cpu_request_microcode, + .collect_cpu_info = collect_cpu_info, + .apply_microcode = apply_microcode, + .microcode_fini_cpu = microcode_fini_cpu, +}; + +static int __init microcode_intel_module_init(void) +{ + struct cpuinfo_x86 *c = &cpu_data(get_cpu()); + + if (c->x86_vendor == X86_VENDOR_INTEL) + return microcode_init(µcode_intel_ops, THIS_MODULE); + else + return -ENODEV; +} + +static void __exit microcode_intel_module_exit(void) +{ + microcode_exit(); +} + +module_init(microcode_intel_module_init) +module_exit(microcode_intel_module_exit) diff --git a/include/asm-x86/microcode.h b/include/asm-x86/microcode.h index 9231c87..18b2aee 100644 --- a/include/asm-x86/microcode.h +++ b/include/asm-x86/microcode.h @@ -1,3 +1,6 @@ +extern int microcode_init(void *opaque, struct module *module); +extern void microcode_exit(void); + struct microcode_ops { long (*get_next_ucode)(void **mc, long offset); long (*microcode_get_next_ucode)(void **mc, long offset); -- cgit v0.10.2 From 80cc9f1020f49c9e5b50898c102fd444de70a0a3 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Mon, 28 Jul 2008 18:44:22 +0200 Subject: x86: AMD microcode patch loading support This patch introduces microcode patch loading for AMD processors. It is based on previous corresponding work for Intel processors. It hooks into the general patch loading module. Main difference is that a container file format is used to hold all patch data for multiple processors as well as an equivalent CPU table, which comes seperately, as opposed to Intel's microcode patching solution. Kconfig and Makefile have been changed provice config and build option for new source file. Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6b0b885c..7bb4617 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -786,10 +786,12 @@ config MICROCODE select FW_LOADER ---help--- If you say Y here, you will be able to update the microcode on - Intel processors in the IA32 family, e.g. Pentium Pro, Pentium II, - Pentium III, Pentium 4, Xeon etc. You will obviously need the - actual microcode binary data itself which is not shipped with the - Linux kernel. + certain Intel and AMD processors. The Intel support is for the + IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, + Pentium 4, Xeon etc. The AMD support is for family 0x10 and + 0x11 processors, e.g. Opteron, Phenom and Turion 64 Ultra. + You will obviously need the actual microcode binary data itself + which is not shipped with the Linux kernel. This option selects the general module only, you need to select at least one vendor specific module as well. @@ -813,6 +815,17 @@ config MICROCODE_INTEL This driver is only available as a module: the module will be called microcode_intel. +config MICROCODE_AMD + tristate "AMD microcode patch loading support" + depends on MICROCODE + select FW_LOADER + --help--- + If you select this option, microcode patch loading support for AMD + processors will be enabled. + + This driver is only available as a module: the module + will be called microcode_amd. + config MICROCODE_OLD_INTERFACE def_bool y depends on MICROCODE diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f2f9f6d..be454f34 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -53,6 +53,7 @@ obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o +obj-$(CONFIG_MICROCODE_AMD) += microcode_amd.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c new file mode 100644 index 0000000..db199e3 --- /dev/null +++ b/arch/x86/kernel/microcode_amd.c @@ -0,0 +1,521 @@ +/* + * AMD CPU Microcode Update Driver for Linux + * Copyright (C) 2008 Advanced Micro Devices Inc. + * + * Author: Peter Oruba + * + * Based on work by: + * Tigran Aivazian + * + * This driver allows to upgrade microcode on AMD + * family 0x10 and 0x11 processors. + * + * Licensed unter the terms of the GNU General Public + * License version 2. See file COPYING for details. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +MODULE_DESCRIPTION("AMD Microcode Update Driver"); +MODULE_AUTHOR("Peter Oruba "); +MODULE_LICENSE("GPLv2"); + +#define UCODE_MAGIC 0x00414d44 +#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 +#define UCODE_UCODE_TYPE 0x00000001 + +#define UCODE_MAX_SIZE (2048) +#define DEFAULT_UCODE_DATASIZE (896) /* 896 bytes */ +#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) /* 64 bytes */ +#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 960 bytes */ +#define DWSIZE (sizeof(u32)) +/* For now we support a fixed ucode total size only */ +#define get_totalsize(mc) \ + ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \ + + MC_HEADER_SIZE) + +extern int microcode_init(void *opaque, struct module *module); +extern void microcode_exit(void); + +/* serialize access to the physical write */ +static DEFINE_SPINLOCK(microcode_update_lock); + +/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ +extern struct mutex (microcode_mutex); + +struct equiv_cpu_entry *equiv_cpu_table; + +extern struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; + +static void collect_cpu_info_amd(int cpu) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + /* We should bind the task to the CPU */ + BUG_ON(raw_smp_processor_id() != cpu); + uci->rev = 0; + uci->pf = 0; + uci->mc.mc_amd = NULL; + uci->valid = 1; + + if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { + printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n", + cpu); + uci->valid = 0; + return; + } + + asm volatile("movl %1, %%ecx; rdmsr" + : "=a" (uci->rev) + : "i" (0x0000008B) : "ecx"); + + printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n", + uci->rev); +} + +static int get_matching_microcode_amd(void *mc, int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct microcode_header_amd *mc_header = mc; + unsigned long total_size = get_totalsize(mc_header); + void *new_mc; + struct pci_dev *nb_pci_dev, *sb_pci_dev; + unsigned int current_cpu_id; + unsigned int equiv_cpu_id = 0x00; + unsigned int i = 0; + + /* We should bind the task to the CPU */ + BUG_ON(cpu != raw_smp_processor_id()); + + /* This is a tricky part. We might be called from a write operation */ + /* to the device file instead of the usual process of firmware */ + /* loading. This routine needs to be able to distinguish both */ +/* cases. This is done by checking if there alread is a equivalent */ + /* CPU table installed. If not, we're written through */ + /* /dev/cpu/microcode. */ +/* Since we ignore all checks. The error case in which going through */ +/* firmware loading and that table is not loaded has already been */ + /* checked earlier. */ + if (equiv_cpu_table == NULL) { + printk(KERN_INFO "microcode: CPU%d microcode update with " + "version 0x%x (current=0x%x)\n", + cpu, mc_header->patch_id, uci->rev); + goto out; + } + + current_cpu_id = cpuid_eax(0x00000001); + + while (equiv_cpu_table[i].installed_cpu != 0) { + if (current_cpu_id == equiv_cpu_table[i].installed_cpu) { + equiv_cpu_id = equiv_cpu_table[i].equiv_cpu; + break; + } + i++; + } + + if (!equiv_cpu_id) { + printk(KERN_ERR "microcode: CPU%d cpu_id " + "not found in equivalent cpu table \n", cpu); + return 0; + } + + if ((mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff)) { + printk(KERN_ERR + "microcode: CPU%d patch does not match " + "(patch is %x, cpu extended is %x) \n", + cpu, mc_header->processor_rev_id[0], + (equiv_cpu_id & 0xff)); + return 0; + } + + if ((mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff)) { + printk(KERN_ERR "microcode: CPU%d patch does not match " + "(patch is %x, cpu base id is %x) \n", + cpu, mc_header->processor_rev_id[1], + ((equiv_cpu_id >> 16) & 0xff)); + + return 0; + } + + /* ucode may be northbridge specific */ + if (mc_header->nb_dev_id) { + nb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, + (mc_header->nb_dev_id & 0xff), + NULL); + if ((!nb_pci_dev) || + (mc_header->nb_rev_id != nb_pci_dev->revision)) { + printk(KERN_ERR "microcode: CPU%d NB mismatch \n", cpu); + pci_dev_put(nb_pci_dev); + return 0; + } + pci_dev_put(nb_pci_dev); + } + + /* ucode may be southbridge specific */ + if (mc_header->sb_dev_id) { + sb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, + (mc_header->sb_dev_id & 0xff), + NULL); + if ((!sb_pci_dev) || + (mc_header->sb_rev_id != sb_pci_dev->revision)) { + printk(KERN_ERR "microcode: CPU%d SB mismatch \n", cpu); + pci_dev_put(sb_pci_dev); + return 0; + } + pci_dev_put(sb_pci_dev); + } + + if (mc_header->patch_id <= uci->rev) + return 0; + + printk(KERN_INFO "microcode: CPU%d found a matching microcode " + "update with version 0x%x (current=0x%x)\n", + cpu, mc_header->patch_id, uci->rev); + +out: + new_mc = vmalloc(UCODE_MAX_SIZE); + if (!new_mc) { + printk(KERN_ERR "microcode: error, can't allocate memory\n"); + return -ENOMEM; + } + memset(new_mc, 0, UCODE_MAX_SIZE); + + /* free previous update file */ + vfree(uci->mc.mc_amd); + + memcpy(new_mc, mc, total_size); + + uci->mc.mc_amd = new_mc; + return 1; +} + +static void apply_microcode_amd(int cpu) +{ + unsigned long flags; + unsigned int eax, edx; + unsigned int rev; + int cpu_num = raw_smp_processor_id(); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + + /* We should bind the task to the CPU */ + BUG_ON(cpu_num != cpu); + + if (uci->mc.mc_amd == NULL) + return; + + spin_lock_irqsave(µcode_update_lock, flags); + + edx = (unsigned int)(((unsigned long) + &(uci->mc.mc_amd->hdr.data_code)) >> 32); + eax = (unsigned int)(((unsigned long) + &(uci->mc.mc_amd->hdr.data_code)) & 0xffffffffL); + + asm volatile("movl %0, %%ecx; wrmsr" : + : "i" (0xc0010020), "a" (eax), "d" (edx) : "ecx"); + + /* get patch id after patching */ + asm volatile("movl %1, %%ecx; rdmsr" + : "=a" (rev) + : "i" (0x0000008B) : "ecx"); + + spin_unlock_irqrestore(µcode_update_lock, flags); + + /* check current patch id and patch's id for match */ + if (rev != uci->mc.mc_amd->hdr.patch_id) { + printk(KERN_ERR "microcode: CPU%d update from revision " + "0x%x to 0x%x failed\n", cpu_num, + uci->mc.mc_amd->hdr.patch_id, rev); + return; + } + + printk(KERN_INFO "microcode: CPU%d updated from revision " + "0x%x to 0x%x \n", + cpu_num, uci->rev, uci->mc.mc_amd->hdr.patch_id); + + uci->rev = rev; +} + +#ifdef CONFIG_MICROCODE_OLD_INTERFACE +extern void __user *user_buffer; /* user area microcode data buffer */ +extern unsigned int user_buffer_size; /* it's size */ + +static long get_next_ucode_amd(void **mc, long offset) +{ + struct microcode_header_amd mc_header; + unsigned long total_size; + + /* No more data */ + if (offset >= user_buffer_size) + return 0; + if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) { + printk(KERN_ERR "microcode: error! Can not read user data\n"); + return -EFAULT; + } + total_size = get_totalsize(&mc_header); + if (offset + total_size > user_buffer_size) { + printk(KERN_ERR "microcode: error! Bad total size in microcode " + "data file\n"); + return -EINVAL; + } + *mc = vmalloc(UCODE_MAX_SIZE); + if (!*mc) + return -ENOMEM; + memset(*mc, 0, UCODE_MAX_SIZE); + + if (copy_from_user(*mc, user_buffer + offset, total_size)) { + printk(KERN_ERR "microcode: error! Can not read user data\n"); + vfree(*mc); + return -EFAULT; + } + return offset + total_size; +} +#else +#define get_next_ucode_amd() NULL +#endif + +static long get_next_ucode_from_buffer_amd(void **mc, void *buf, + unsigned long size, long offset) +{ + struct microcode_header_amd *mc_header; + unsigned long total_size; + unsigned char *buf_pos = buf; + + /* No more data */ + if (offset >= size) + return 0; + + if (buf_pos[offset] != UCODE_UCODE_TYPE) { + printk(KERN_ERR "microcode: error! " + "Wrong microcode payload type field\n"); + return -EINVAL; + } + + mc_header = (struct microcode_header_amd *)(&buf_pos[offset+8]); + + total_size = (unsigned long) (buf_pos[offset+4] + + (buf_pos[offset+5] << 8)); + + printk(KERN_INFO "microcode: size %lu, total_size %lu, offset %ld\n", + size, total_size, offset); + + if (offset + total_size > size) { + printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); + return -EINVAL; + } + + *mc = vmalloc(UCODE_MAX_SIZE); + if (!*mc) { + printk(KERN_ERR "microcode: error! " + "Can not allocate memory for microcode patch\n"); + return -ENOMEM; + } + + memset(*mc, 0, UCODE_MAX_SIZE); + memcpy(*mc, buf + offset + 8, total_size); + + return offset + total_size + 8; +} + +static long install_equiv_cpu_table(void *buf, unsigned long size, long offset) +{ + unsigned int *buf_pos = buf; + + /* No more data */ + if (offset >= size) + return 0; + + if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE) { + printk(KERN_ERR "microcode: error! " + "Wrong microcode equivalnet cpu table type field\n"); + return 0; + } + + if (size == 0) { + printk(KERN_ERR "microcode: error! " + "Wrong microcode equivalnet cpu table length\n"); + return 0; + } + + equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); + if (!equiv_cpu_table) { + printk(KERN_ERR "microcode: error, can't allocate memory for equiv CPU table\n"); + return 0; + } + + memset(equiv_cpu_table, 0, size); + memcpy(equiv_cpu_table, &buf_pos[3], size); + + return size + 12; /* add header length */ +} + +/* fake device for request_firmware */ +extern struct platform_device *microcode_pdev; + +static int cpu_request_microcode_amd(int cpu) +{ + char name[30]; + const struct firmware *firmware; + void *buf; + unsigned int *buf_pos; + unsigned long size; + long offset = 0; + int error; + void *mc; + + /* We should bind the task to the CPU */ + BUG_ON(cpu != raw_smp_processor_id()); + + sprintf(name, "amd-ucode/microcode_amd.bin"); + error = request_firmware(&firmware, "amd-ucode/microcode_amd.bin", + µcode_pdev->dev); + if (error) { + printk(KERN_ERR "microcode: ucode data file %s load failed\n", + name); + return error; + } + + buf_pos = buf = firmware->data; + size = firmware->size; + + if (buf_pos[0] != UCODE_MAGIC) { + printk(KERN_ERR "microcode: error! Wrong microcode patch file magic\n"); + return -EINVAL; + } + + offset = install_equiv_cpu_table(buf, buf_pos[2], offset); + + if (!offset) { + printk(KERN_ERR "microcode: installing equivalent cpu table failed\n"); + return -EINVAL; + } + + while ((offset = + get_next_ucode_from_buffer_amd(&mc, buf, size, offset)) > 0) { + error = get_matching_microcode_amd(mc, cpu); + if (error < 0) + break; + /* + * It's possible the data file has multiple matching ucode, + * lets keep searching till the latest version + */ + if (error == 1) { + apply_microcode_amd(cpu); + error = 0; + } + vfree(mc); + } + if (offset > 0) { + vfree(mc); + vfree(equiv_cpu_table); + equiv_cpu_table = NULL; + } + if (offset < 0) + error = offset; + release_firmware(firmware); + + return error; +} + +static int apply_microcode_check_cpu_amd(int cpu) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + unsigned int rev; + cpumask_t old; + cpumask_of_cpu_ptr(newmask, cpu); + int err = 0; + + /* Check if the microcode is available */ + if (!uci->mc.mc_amd) + return 0; + + old = current->cpus_allowed; + set_cpus_allowed(current, newmask); + + /* Check if the microcode we have in memory matches the CPU */ + if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 16) + err = -EINVAL; + + if (!err) { + asm volatile("movl %1, %%ecx; rdmsr" + : "=a" (rev) + : "i" (0x0000008B) : "ecx"); + + if (uci->rev != rev) + err = -EINVAL; + } + + if (!err) + apply_microcode_amd(cpu); + else + printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:" + " rev=0x%x\n", + cpu, uci->rev); + + set_cpus_allowed(current, old); + return err; +} + +static void microcode_fini_cpu_amd(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + mutex_lock(µcode_mutex); + uci->valid = 0; + vfree(uci->mc.mc_amd); + uci->mc.mc_amd = NULL; + mutex_unlock(µcode_mutex); +} + +static struct microcode_ops microcode_amd_ops = { + .get_next_ucode = get_next_ucode_amd, + .get_matching_microcode = get_matching_microcode_amd, + .microcode_sanity_check = NULL, + .apply_microcode_check_cpu = apply_microcode_check_cpu_amd, + .cpu_request_microcode = cpu_request_microcode_amd, + .collect_cpu_info = collect_cpu_info_amd, + .apply_microcode = apply_microcode_amd, + .microcode_fini_cpu = microcode_fini_cpu_amd, +}; + +static int __init microcode_amd_module_init(void) +{ + struct cpuinfo_x86 *c = &cpu_data(get_cpu()); + + equiv_cpu_table = NULL; + if (c->x86_vendor == X86_VENDOR_AMD) + return microcode_init(µcode_amd_ops, THIS_MODULE); + else + return -ENODEV; +} + +static void __exit microcode_amd_module_exit(void) +{ + microcode_exit(); +} + +module_init(microcode_amd_module_init) +module_exit(microcode_amd_module_exit) -- cgit v0.10.2 From 45b1e23eca1c53fa79a611a2bc8c93697ede6c97 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 29 Jul 2008 09:42:17 +0200 Subject: x86, microcode support: fix build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: arch/x86/kernel/microcode.c:412: error: static declaration of ‘microcode_init’ follows non-static declaration include/asm/microcode.h:1: error: previous declaration of ‘microcode_init’ was here arch/x86/kernel/microcode.c:454: error: static declaration of ‘microcode_exit’ follows non-static declaration include/asm/microcode.h:2: error: previous declaration of ‘microcode_exit’ was here Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 1e42e79..9a88184 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -415,7 +415,7 @@ static struct notifier_block __refdata mc_cpu_notifier = { .notifier_call = mc_cpu_callback, }; -static int microcode_init(void *opaque, struct module *module) +int microcode_init(void *opaque, struct module *module) { struct microcode_ops *ops = (struct microcode_ops *)opaque; int error; @@ -457,7 +457,7 @@ static int microcode_init(void *opaque, struct module *module) } EXPORT_SYMBOL_GPL(microcode_init); -static void __exit microcode_exit (void) +void __exit microcode_exit (void) { microcode_dev_exit(); -- cgit v0.10.2 From 224e946b81166d732f3e9b414cb69612072fb500 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 29 Jul 2008 09:52:55 +0200 Subject: x86, microcode: fix symbol exports fix tons of build errors: arch/x86/kernel/built-in.o: In function `microcode_fini_cpu': microcode_intel.c:(.text+0x11598): undefined reference to `microcode_mutex' microcode_intel.c:(.text+0x115a4): undefined reference to `ucode_cpu_info' microcode_intel.c:(.text+0x115ae): undefined reference to `ucode_cpu_info' microcode_intel.c:(.text+0x115bc): undefined reference to `microcode_mutex' [...] Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 9a88184..758b958 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -104,16 +104,16 @@ MODULE_LICENSE("GPL"); struct microcode_ops *microcode_ops; /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -static DEFINE_MUTEX(microcode_mutex); +DEFINE_MUTEX(microcode_mutex); EXPORT_SYMBOL_GPL(microcode_mutex); -static struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; +struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; EXPORT_SYMBOL_GPL(ucode_cpu_info); #ifdef CONFIG_MICROCODE_OLD_INTERFACE -static void __user *user_buffer; /* user area microcode data buffer */ +void __user *user_buffer; /* user area microcode data buffer */ EXPORT_SYMBOL_GPL(user_buffer); -static unsigned int user_buffer_size; /* it's size */ +unsigned int user_buffer_size; /* it's size */ EXPORT_SYMBOL_GPL(user_buffer_size); static int do_microcode_update (void) @@ -230,7 +230,7 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); #endif /* fake device for request_firmware */ -static struct platform_device *microcode_pdev; +struct platform_device *microcode_pdev; EXPORT_SYMBOL_GPL(microcode_pdev); static void microcode_init_cpu(int cpu, int resume) -- cgit v0.10.2 From 5d7b605245b1aa1a9cd6549b1f57d69273eb0c37 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 29 Jul 2008 10:07:36 +0200 Subject: x86, microcode: fix module license string fix: FATAL: modpost: GPL-incompatible module microcode_amd.ko uses GPL-only symbol 'set_cpus_allowed_ptr' Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index db199e3..fd9e68e 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -40,7 +40,7 @@ MODULE_DESCRIPTION("AMD Microcode Update Driver"); MODULE_AUTHOR("Peter Oruba "); -MODULE_LICENSE("GPLv2"); +MODULE_LICENSE("GPL v2"); #define UCODE_MAGIC 0x00414d44 #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 -- cgit v0.10.2 From 6902c0bead4ce266226fc0c5b3828b850bdc884a Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 6 Jun 2008 01:33:22 -0400 Subject: Input: gameport - make gameport_register_driver() return errors Perform actual driver registration right in gameport_register_driver() instead of offloading it to kgameportd and return proper error code to callers if driver registration fails. Note that driver <-> port matching is still done by kgameportd. Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index 078e4ee..2880eaa 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c @@ -231,6 +231,7 @@ static void gameport_find_driver(struct gameport *gameport) enum gameport_event_type { GAMEPORT_REGISTER_PORT, GAMEPORT_REGISTER_DRIVER, + GAMEPORT_ATTACH_DRIVER, }; struct gameport_event { @@ -245,11 +246,12 @@ static LIST_HEAD(gameport_event_list); static DECLARE_WAIT_QUEUE_HEAD(gameport_wait); static struct task_struct *gameport_task; -static void gameport_queue_event(void *object, struct module *owner, - enum gameport_event_type event_type) +static int gameport_queue_event(void *object, struct module *owner, + enum gameport_event_type event_type) { unsigned long flags; struct gameport_event *event; + int retval = 0; spin_lock_irqsave(&gameport_event_lock, flags); @@ -268,24 +270,34 @@ static void gameport_queue_event(void *object, struct module *owner, } } - if ((event = kmalloc(sizeof(struct gameport_event), GFP_ATOMIC))) { - if (!try_module_get(owner)) { - printk(KERN_WARNING "gameport: Can't get module reference, dropping event %d\n", event_type); - kfree(event); - goto out; - } - - event->type = event_type; - event->object = object; - event->owner = owner; + event = kmalloc(sizeof(struct gameport_event), GFP_ATOMIC); + if (!event) { + printk(KERN_ERR + "gameport: Not enough memory to queue event %d\n", + event_type); + retval = -ENOMEM; + goto out; + } - list_add_tail(&event->node, &gameport_event_list); - wake_up(&gameport_wait); - } else { - printk(KERN_ERR "gameport: Not enough memory to queue event %d\n", event_type); + if (!try_module_get(owner)) { + printk(KERN_WARNING + "gameport: Can't get module reference, dropping event %d\n", + event_type); + kfree(event); + retval = -EINVAL; + goto out; } + + event->type = event_type; + event->object = object; + event->owner = owner; + + list_add_tail(&event->node, &gameport_event_list); + wake_up(&gameport_wait); + out: spin_unlock_irqrestore(&gameport_event_lock, flags); + return retval; } static void gameport_free_event(struct gameport_event *event) @@ -378,9 +390,10 @@ static void gameport_handle_event(void) } /* - * Remove all events that have been submitted for a given gameport port. + * Remove all events that have been submitted for a given object, + * be it a gameport port or a driver. */ -static void gameport_remove_pending_events(struct gameport *gameport) +static void gameport_remove_pending_events(void *object) { struct list_head *node, *next; struct gameport_event *event; @@ -390,7 +403,7 @@ static void gameport_remove_pending_events(struct gameport *gameport) list_for_each_safe(node, next, &gameport_event_list) { event = list_entry(node, struct gameport_event, node); - if (event->object == gameport) { + if (event->object == object) { list_del_init(node); gameport_free_event(event); } @@ -705,10 +718,40 @@ static void gameport_add_driver(struct gameport_driver *drv) drv->driver.name, error); } -void __gameport_register_driver(struct gameport_driver *drv, struct module *owner) +int __gameport_register_driver(struct gameport_driver *drv, struct module *owner, + const char *mod_name) { + int error; + drv->driver.bus = &gameport_bus; - gameport_queue_event(drv, owner, GAMEPORT_REGISTER_DRIVER); + drv->driver.owner = owner; + drv->driver.mod_name = mod_name; + + /* + * Temporarily disable automatic binding because probing + * takes long time and we are better off doing it in kgameportd + */ + drv->ignore = 1; + + error = driver_register(&drv->driver); + if (error) { + printk(KERN_ERR + "gameport: driver_register() failed for %s, error: %d\n", + drv->driver.name, error); + return error; + } + + /* + * Reset ignore flag and let kgameportd bind the driver to free ports + */ + drv->ignore = 0; + error = gameport_queue_event(drv, NULL, GAMEPORT_ATTACH_DRIVER); + if (error) { + driver_unregister(&drv->driver); + return error; + } + + return 0; } void gameport_unregister_driver(struct gameport_driver *drv) @@ -716,7 +759,9 @@ void gameport_unregister_driver(struct gameport_driver *drv) struct gameport *gameport; mutex_lock(&gameport_mutex); + drv->ignore = 1; /* so gameport_find_driver ignores it */ + gameport_remove_pending_events(drv); start_over: list_for_each_entry(gameport, &gameport_list, node) { @@ -729,6 +774,7 @@ start_over: } driver_unregister(&drv->driver); + mutex_unlock(&gameport_mutex); } diff --git a/include/linux/gameport.h b/include/linux/gameport.h index f64e29c..5126125 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -146,10 +146,11 @@ static inline void gameport_unpin_driver(struct gameport *gameport) mutex_unlock(&gameport->drv_mutex); } -void __gameport_register_driver(struct gameport_driver *drv, struct module *owner); -static inline void gameport_register_driver(struct gameport_driver *drv) +int __gameport_register_driver(struct gameport_driver *drv, + struct module *owner, const char *mod_name); +static inline int gameport_register_driver(struct gameport_driver *drv) { - __gameport_register_driver(drv, THIS_MODULE); + return __gameport_register_driver(drv, THIS_MODULE, KBUILD_MODNAME); } void gameport_unregister_driver(struct gameport_driver *drv); -- cgit v0.10.2 From 2547203d583cc267b98f518d5d93e3a0469d8f62 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 6 Jun 2008 01:33:37 -0400 Subject: Input: gameport - check return value of gameport_register_driver() Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/joystick/a3d.c b/drivers/input/joystick/a3d.c index 92498d4..6489f40 100644 --- a/drivers/input/joystick/a3d.c +++ b/drivers/input/joystick/a3d.c @@ -414,8 +414,7 @@ static struct gameport_driver a3d_drv = { static int __init a3d_init(void) { - gameport_register_driver(&a3d_drv); - return 0; + return gameport_register_driver(&a3d_drv); } static void __exit a3d_exit(void) diff --git a/drivers/input/joystick/adi.c b/drivers/input/joystick/adi.c index d1ca8a1..89c4c08 100644 --- a/drivers/input/joystick/adi.c +++ b/drivers/input/joystick/adi.c @@ -572,8 +572,7 @@ static struct gameport_driver adi_drv = { static int __init adi_init(void) { - gameport_register_driver(&adi_drv); - return 0; + return gameport_register_driver(&adi_drv); } static void __exit adi_exit(void) diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c index 708c5ae..356b3a2 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c @@ -761,9 +761,7 @@ static struct gameport_driver analog_drv = { static int __init analog_init(void) { analog_parse_options(); - gameport_register_driver(&analog_drv); - - return 0; + return gameport_register_driver(&analog_drv); } static void __exit analog_exit(void) diff --git a/drivers/input/joystick/cobra.c b/drivers/input/joystick/cobra.c index 639b975..3497b87 100644 --- a/drivers/input/joystick/cobra.c +++ b/drivers/input/joystick/cobra.c @@ -263,8 +263,7 @@ static struct gameport_driver cobra_drv = { static int __init cobra_init(void) { - gameport_register_driver(&cobra_drv); - return 0; + return gameport_register_driver(&cobra_drv); } static void __exit cobra_exit(void) diff --git a/drivers/input/joystick/gf2k.c b/drivers/input/joystick/gf2k.c index cb6eef1..67c207f 100644 --- a/drivers/input/joystick/gf2k.c +++ b/drivers/input/joystick/gf2k.c @@ -375,8 +375,7 @@ static struct gameport_driver gf2k_drv = { static int __init gf2k_init(void) { - gameport_register_driver(&gf2k_drv); - return 0; + return gameport_register_driver(&gf2k_drv); } static void __exit gf2k_exit(void) diff --git a/drivers/input/joystick/grip.c b/drivers/input/joystick/grip.c index 684e07c..fc55899 100644 --- a/drivers/input/joystick/grip.c +++ b/drivers/input/joystick/grip.c @@ -426,8 +426,7 @@ static struct gameport_driver grip_drv = { static int __init grip_init(void) { - gameport_register_driver(&grip_drv); - return 0; + return gameport_register_driver(&grip_drv); } static void __exit grip_exit(void) diff --git a/drivers/input/joystick/grip_mp.c b/drivers/input/joystick/grip_mp.c index 8279481..2d47baf 100644 --- a/drivers/input/joystick/grip_mp.c +++ b/drivers/input/joystick/grip_mp.c @@ -689,8 +689,7 @@ static struct gameport_driver grip_drv = { static int __init grip_init(void) { - gameport_register_driver(&grip_drv); - return 0; + return gameport_register_driver(&grip_drv); } static void __exit grip_exit(void) diff --git a/drivers/input/joystick/guillemot.c b/drivers/input/joystick/guillemot.c index 25ec3fa..4058d4b 100644 --- a/drivers/input/joystick/guillemot.c +++ b/drivers/input/joystick/guillemot.c @@ -283,8 +283,7 @@ static struct gameport_driver guillemot_drv = { static int __init guillemot_init(void) { - gameport_register_driver(&guillemot_drv); - return 0; + return gameport_register_driver(&guillemot_drv); } static void __exit guillemot_exit(void) diff --git a/drivers/input/joystick/interact.c b/drivers/input/joystick/interact.c index 8c3290b..2478289 100644 --- a/drivers/input/joystick/interact.c +++ b/drivers/input/joystick/interact.c @@ -317,8 +317,7 @@ static struct gameport_driver interact_drv = { static int __init interact_init(void) { - gameport_register_driver(&interact_drv); - return 0; + return gameport_register_driver(&interact_drv); } static void __exit interact_exit(void) diff --git a/drivers/input/joystick/joydump.c b/drivers/input/joystick/joydump.c index 2a1b82c..cd894a0 100644 --- a/drivers/input/joystick/joydump.c +++ b/drivers/input/joystick/joydump.c @@ -161,8 +161,7 @@ static struct gameport_driver joydump_drv = { static int __init joydump_init(void) { - gameport_register_driver(&joydump_drv); - return 0; + return gameport_register_driver(&joydump_drv); } static void __exit joydump_exit(void) diff --git a/drivers/input/joystick/sidewinder.c b/drivers/input/joystick/sidewinder.c index 7b4865f..ca13a6b 100644 --- a/drivers/input/joystick/sidewinder.c +++ b/drivers/input/joystick/sidewinder.c @@ -818,8 +818,7 @@ static struct gameport_driver sw_drv = { static int __init sw_init(void) { - gameport_register_driver(&sw_drv); - return 0; + return gameport_register_driver(&sw_drv); } static void __exit sw_exit(void) diff --git a/drivers/input/joystick/tmdc.c b/drivers/input/joystick/tmdc.c index 60c37bc..d6c6098 100644 --- a/drivers/input/joystick/tmdc.c +++ b/drivers/input/joystick/tmdc.c @@ -438,8 +438,7 @@ static struct gameport_driver tmdc_drv = { static int __init tmdc_init(void) { - gameport_register_driver(&tmdc_drv); - return 0; + return gameport_register_driver(&tmdc_drv); } static void __exit tmdc_exit(void) -- cgit v0.10.2 From 8c4b3c29329eb7ffded3023e6d65bc415cb4e215 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 6 Jun 2008 01:33:51 -0400 Subject: Input: gameport - mark gameport_register_driver() __must_check Signed-off-by: Dmitry Torokhov diff --git a/include/linux/gameport.h b/include/linux/gameport.h index 5126125..0cd825f 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -148,7 +148,7 @@ static inline void gameport_unpin_driver(struct gameport *gameport) int __gameport_register_driver(struct gameport_driver *drv, struct module *owner, const char *mod_name); -static inline int gameport_register_driver(struct gameport_driver *drv) +static inline int __must_check gameport_register_driver(struct gameport_driver *drv) { return __gameport_register_driver(drv, THIS_MODULE, KBUILD_MODNAME); } -- cgit v0.10.2 From 1971b9d56fce9d8903e623b953c5e2fffe3a878e Mon Sep 17 00:00:00 2001 From: Ville Syrjala Date: Thu, 3 Jul 2008 10:45:37 -0400 Subject: Input: ati_remote2 - add loadable keymap support Support for loadable keymaps. The driver now supports individual keymaps for each of the five modes (AUX1-AUX4 and PC) of the remote. To achieve this the keymap scancode is interpreted as a combination of the mode and actual button scancode. The original keycode patches were done by Peter Stokes but I modified it quite a lot. Signed-off-by: Ville Syrjala Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/misc/ati_remote2.c b/drivers/input/misc/ati_remote2.c index a7fabaf..f4918b9 100644 --- a/drivers/input/misc/ati_remote2.c +++ b/drivers/input/misc/ati_remote2.c @@ -1,8 +1,8 @@ /* * ati_remote2 - ATI/Philips USB RF remote driver * - * Copyright (C) 2005 Ville Syrjala - * Copyright (C) 2007 Peter Stokes + * Copyright (C) 2005-2008 Ville Syrjala + * Copyright (C) 2007-2008 Peter Stokes * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 @@ -12,7 +12,7 @@ #include #define DRIVER_DESC "ATI/Philips USB RF remote driver" -#define DRIVER_VERSION "0.2" +#define DRIVER_VERSION "0.3" MODULE_DESCRIPTION(DRIVER_DESC); MODULE_VERSION(DRIVER_VERSION); @@ -27,7 +27,7 @@ MODULE_LICENSE("GPL"); * A remote's "channel" may be altered by pressing and holding the "PC" button for * approximately 3 seconds, after which the button will slowly flash the count of the * currently configured "channel", using the numeric keypad enter a number between 1 and - * 16 and then the "PC" button again, the button will slowly flash the count of the + * 16 and then press the "PC" button again, the button will slowly flash the count of the * newly configured "channel". */ @@ -45,9 +45,18 @@ static struct usb_device_id ati_remote2_id_table[] = { }; MODULE_DEVICE_TABLE(usb, ati_remote2_id_table); -static struct { - int hw_code; - int key_code; +enum { + ATI_REMOTE2_AUX1, + ATI_REMOTE2_AUX2, + ATI_REMOTE2_AUX3, + ATI_REMOTE2_AUX4, + ATI_REMOTE2_PC, + ATI_REMOTE2_MODES, +}; + +static const struct { + u8 hw_code; + u16 keycode; } ati_remote2_key_table[] = { { 0x00, KEY_0 }, { 0x01, KEY_1 }, @@ -73,6 +82,7 @@ static struct { { 0x37, KEY_RECORD }, { 0x38, KEY_DVD }, { 0x39, KEY_TV }, + { 0x3f, KEY_PROG1 }, /* AUX1-AUX4 and PC */ { 0x54, KEY_MENU }, { 0x58, KEY_UP }, { 0x59, KEY_DOWN }, @@ -91,15 +101,9 @@ static struct { { 0xa9, BTN_LEFT }, { 0xaa, BTN_RIGHT }, { 0xbe, KEY_QUESTION }, - { 0xd5, KEY_FRONT }, { 0xd0, KEY_EDIT }, + { 0xd5, KEY_FRONT }, { 0xf9, KEY_INFO }, - { (0x00 << 8) | 0x3f, KEY_PROG1 }, - { (0x01 << 8) | 0x3f, KEY_PROG2 }, - { (0x02 << 8) | 0x3f, KEY_PROG3 }, - { (0x03 << 8) | 0x3f, KEY_PROG4 }, - { (0x04 << 8) | 0x3f, KEY_PC }, - { 0, KEY_RESERVED } }; struct ati_remote2 { @@ -117,6 +121,9 @@ struct ati_remote2 { char name[64]; char phys[64]; + + /* Each mode (AUX1-AUX4 and PC) can have an independent keymap. */ + u16 keycode[ATI_REMOTE2_MODES][ARRAY_SIZE(ati_remote2_key_table)]; }; static int ati_remote2_probe(struct usb_interface *interface, const struct usb_device_id *id); @@ -172,7 +179,7 @@ static void ati_remote2_input_mouse(struct ati_remote2 *ar2) mode = data[0] & 0x0F; - if (mode > 4) { + if (mode > ATI_REMOTE2_PC) { dev_err(&ar2->intf[0]->dev, "Unknown mode byte (%02x %02x %02x %02x)\n", data[3], data[2], data[1], data[0]); @@ -191,7 +198,7 @@ static int ati_remote2_lookup(unsigned int hw_code) { int i; - for (i = 0; ati_remote2_key_table[i].key_code != KEY_RESERVED; i++) + for (i = 0; i < ARRAY_SIZE(ati_remote2_key_table); i++) if (ati_remote2_key_table[i].hw_code == hw_code) return i; @@ -211,7 +218,7 @@ static void ati_remote2_input_key(struct ati_remote2 *ar2) mode = data[0] & 0x0F; - if (mode > 4) { + if (mode > ATI_REMOTE2_PC) { dev_err(&ar2->intf[1]->dev, "Unknown mode byte (%02x %02x %02x %02x)\n", data[3], data[2], data[1], data[0]); @@ -219,10 +226,6 @@ static void ati_remote2_input_key(struct ati_remote2 *ar2) } hw_code = data[2]; - /* - * Mode keys (AUX1-AUX4, PC) all generate the same code byte. - * Use the mode byte to figure out which one was pressed. - */ if (hw_code == 0x3f) { /* * For some incomprehensible reason the mouse pad generates @@ -236,8 +239,6 @@ static void ati_remote2_input_key(struct ati_remote2 *ar2) if (data[1] == 0) ar2->mode = mode; - - hw_code |= mode << 8; } if (!((1 << mode) & mode_mask)) @@ -260,8 +261,8 @@ static void ati_remote2_input_key(struct ati_remote2 *ar2) case 2: /* repeat */ /* No repeat for mouse buttons. */ - if (ati_remote2_key_table[index].key_code == BTN_LEFT || - ati_remote2_key_table[index].key_code == BTN_RIGHT) + if (ar2->keycode[mode][index] == BTN_LEFT || + ar2->keycode[mode][index] == BTN_RIGHT) return; if (!time_after_eq(jiffies, ar2->jiffies)) @@ -276,7 +277,7 @@ static void ati_remote2_input_key(struct ati_remote2 *ar2) return; } - input_event(idev, EV_KEY, ati_remote2_key_table[index].key_code, data[1]); + input_event(idev, EV_KEY, ar2->keycode[mode][index], data[1]); input_sync(idev); } @@ -334,10 +335,60 @@ static void ati_remote2_complete_key(struct urb *urb) "%s(): usb_submit_urb() = %d\n", __func__, r); } +static int ati_remote2_getkeycode(struct input_dev *idev, + int scancode, int *keycode) +{ + struct ati_remote2 *ar2 = input_get_drvdata(idev); + int index, mode; + + mode = scancode >> 8; + if (mode > ATI_REMOTE2_PC || !((1 << mode) & mode_mask)) + return -EINVAL; + + index = ati_remote2_lookup(scancode & 0xFF); + if (index < 0) + return -EINVAL; + + *keycode = ar2->keycode[mode][index]; + return 0; +} + +static int ati_remote2_setkeycode(struct input_dev *idev, int scancode, int keycode) +{ + struct ati_remote2 *ar2 = input_get_drvdata(idev); + int index, mode, old_keycode; + + mode = scancode >> 8; + if (mode > ATI_REMOTE2_PC || !((1 << mode) & mode_mask)) + return -EINVAL; + + index = ati_remote2_lookup(scancode & 0xFF); + if (index < 0) + return -EINVAL; + + if (keycode < KEY_RESERVED || keycode > KEY_MAX) + return -EINVAL; + + old_keycode = ar2->keycode[mode][index]; + ar2->keycode[mode][index] = keycode; + set_bit(keycode, idev->keybit); + + for (mode = 0; mode < ATI_REMOTE2_MODES; mode++) { + for (index = 0; index < ARRAY_SIZE(ati_remote2_key_table); index++) { + if (ar2->keycode[mode][index] == old_keycode) + return 0; + } + } + + clear_bit(old_keycode, idev->keybit); + + return 0; +} + static int ati_remote2_input_init(struct ati_remote2 *ar2) { struct input_dev *idev; - int i, retval; + int index, mode, retval; idev = input_allocate_device(); if (!idev) @@ -350,8 +401,26 @@ static int ati_remote2_input_init(struct ati_remote2 *ar2) idev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) | BIT_MASK(BTN_RIGHT); idev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y); - for (i = 0; ati_remote2_key_table[i].key_code != KEY_RESERVED; i++) - set_bit(ati_remote2_key_table[i].key_code, idev->keybit); + + for (mode = 0; mode < ATI_REMOTE2_MODES; mode++) { + for (index = 0; index < ARRAY_SIZE(ati_remote2_key_table); index++) { + ar2->keycode[mode][index] = ati_remote2_key_table[index].keycode; + set_bit(ar2->keycode[mode][index], idev->keybit); + } + } + + /* AUX1-AUX4 and PC generate the same scancode. */ + index = ati_remote2_lookup(0x3f); + ar2->keycode[ATI_REMOTE2_AUX1][index] = KEY_PROG1; + ar2->keycode[ATI_REMOTE2_AUX2][index] = KEY_PROG2; + ar2->keycode[ATI_REMOTE2_AUX3][index] = KEY_PROG3; + ar2->keycode[ATI_REMOTE2_AUX4][index] = KEY_PROG4; + ar2->keycode[ATI_REMOTE2_PC][index] = KEY_PC; + set_bit(KEY_PROG1, idev->keybit); + set_bit(KEY_PROG2, idev->keybit); + set_bit(KEY_PROG3, idev->keybit); + set_bit(KEY_PROG4, idev->keybit); + set_bit(KEY_PC, idev->keybit); idev->rep[REP_DELAY] = 250; idev->rep[REP_PERIOD] = 33; @@ -359,6 +428,9 @@ static int ati_remote2_input_init(struct ati_remote2 *ar2) idev->open = ati_remote2_open; idev->close = ati_remote2_close; + idev->getkeycode = ati_remote2_getkeycode; + idev->setkeycode = ati_remote2_setkeycode; + idev->name = ar2->name; idev->phys = ar2->phys; -- cgit v0.10.2 From d6505ab9cd5672f99adeba86696499c2651a6e73 Mon Sep 17 00:00:00 2001 From: Ville Syrjala Date: Thu, 3 Jul 2008 10:45:37 -0400 Subject: Input: ati_remote2 - add autosuspend support Signed-off-by: Ville Syrjala Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/misc/ati_remote2.c b/drivers/input/misc/ati_remote2.c index f4918b9..3c9988d 100644 --- a/drivers/input/misc/ati_remote2.c +++ b/drivers/input/misc/ati_remote2.c @@ -45,6 +45,13 @@ static struct usb_device_id ati_remote2_id_table[] = { }; MODULE_DEVICE_TABLE(usb, ati_remote2_id_table); +static DEFINE_MUTEX(ati_remote2_mutex); + +enum { + ATI_REMOTE2_OPENED = 0x1, + ATI_REMOTE2_SUSPENDED = 0x2, +}; + enum { ATI_REMOTE2_AUX1, ATI_REMOTE2_AUX2, @@ -124,46 +131,103 @@ struct ati_remote2 { /* Each mode (AUX1-AUX4 and PC) can have an independent keymap. */ u16 keycode[ATI_REMOTE2_MODES][ARRAY_SIZE(ati_remote2_key_table)]; + + unsigned int flags; }; static int ati_remote2_probe(struct usb_interface *interface, const struct usb_device_id *id); static void ati_remote2_disconnect(struct usb_interface *interface); +static int ati_remote2_suspend(struct usb_interface *interface, pm_message_t message); +static int ati_remote2_resume(struct usb_interface *interface); static struct usb_driver ati_remote2_driver = { .name = "ati_remote2", .probe = ati_remote2_probe, .disconnect = ati_remote2_disconnect, .id_table = ati_remote2_id_table, + .suspend = ati_remote2_suspend, + .resume = ati_remote2_resume, + .supports_autosuspend = 1, }; -static int ati_remote2_open(struct input_dev *idev) +static int ati_remote2_submit_urbs(struct ati_remote2 *ar2) { - struct ati_remote2 *ar2 = input_get_drvdata(idev); int r; r = usb_submit_urb(ar2->urb[0], GFP_KERNEL); if (r) { dev_err(&ar2->intf[0]->dev, - "%s: usb_submit_urb() = %d\n", __func__, r); + "%s(): usb_submit_urb() = %d\n", __func__, r); return r; } r = usb_submit_urb(ar2->urb[1], GFP_KERNEL); if (r) { usb_kill_urb(ar2->urb[0]); dev_err(&ar2->intf[1]->dev, - "%s: usb_submit_urb() = %d\n", __func__, r); + "%s(): usb_submit_urb() = %d\n", __func__, r); return r; } return 0; } +static void ati_remote2_kill_urbs(struct ati_remote2 *ar2) +{ + usb_kill_urb(ar2->urb[1]); + usb_kill_urb(ar2->urb[0]); +} + +static int ati_remote2_open(struct input_dev *idev) +{ + struct ati_remote2 *ar2 = input_get_drvdata(idev); + int r; + + dev_dbg(&ar2->intf[0]->dev, "%s()\n", __func__); + + r = usb_autopm_get_interface(ar2->intf[0]); + if (r) { + dev_err(&ar2->intf[0]->dev, + "%s(): usb_autopm_get_interface() = %d\n", __func__, r); + goto fail1; + } + + mutex_lock(&ati_remote2_mutex); + + if (!(ar2->flags & ATI_REMOTE2_SUSPENDED)) { + r = ati_remote2_submit_urbs(ar2); + if (r) + goto fail2; + } + + ar2->flags |= ATI_REMOTE2_OPENED; + + mutex_unlock(&ati_remote2_mutex); + + usb_autopm_put_interface(ar2->intf[0]); + + return 0; + + fail2: + mutex_unlock(&ati_remote2_mutex); + usb_autopm_put_interface(ar2->intf[0]); + fail1: + return r; +} + static void ati_remote2_close(struct input_dev *idev) { struct ati_remote2 *ar2 = input_get_drvdata(idev); - usb_kill_urb(ar2->urb[0]); - usb_kill_urb(ar2->urb[1]); + dev_dbg(&ar2->intf[0]->dev, "%s()\n", __func__); + + mutex_lock(&ati_remote2_mutex); + + if (!(ar2->flags & ATI_REMOTE2_SUSPENDED)) + ati_remote2_kill_urbs(ar2); + + ar2->flags &= ~ATI_REMOTE2_OPENED; + + mutex_unlock(&ati_remote2_mutex); } static void ati_remote2_input_mouse(struct ati_remote2 *ar2) @@ -288,6 +352,7 @@ static void ati_remote2_complete_mouse(struct urb *urb) switch (urb->status) { case 0: + usb_mark_last_busy(ar2->udev); ati_remote2_input_mouse(ar2); break; case -ENOENT: @@ -298,6 +363,7 @@ static void ati_remote2_complete_mouse(struct urb *urb) "%s(): urb status = %d\n", __func__, urb->status); return; default: + usb_mark_last_busy(ar2->udev); dev_err(&ar2->intf[0]->dev, "%s(): urb status = %d\n", __func__, urb->status); } @@ -315,6 +381,7 @@ static void ati_remote2_complete_key(struct urb *urb) switch (urb->status) { case 0: + usb_mark_last_busy(ar2->udev); ati_remote2_input_key(ar2); break; case -ENOENT: @@ -325,6 +392,7 @@ static void ati_remote2_complete_key(struct urb *urb) "%s(): urb status = %d\n", __func__, urb->status); return; default: + usb_mark_last_busy(ar2->udev); dev_err(&ar2->intf[1]->dev, "%s(): urb status = %d\n", __func__, urb->status); } @@ -562,6 +630,8 @@ static int ati_remote2_probe(struct usb_interface *interface, const struct usb_d usb_set_intfdata(interface, ar2); + interface->needs_remote_wakeup = 1; + return 0; fail2: @@ -594,6 +664,57 @@ static void ati_remote2_disconnect(struct usb_interface *interface) kfree(ar2); } +static int ati_remote2_suspend(struct usb_interface *interface, + pm_message_t message) +{ + struct ati_remote2 *ar2; + struct usb_host_interface *alt = interface->cur_altsetting; + + if (alt->desc.bInterfaceNumber) + return 0; + + ar2 = usb_get_intfdata(interface); + + dev_dbg(&ar2->intf[0]->dev, "%s()\n", __func__); + + mutex_lock(&ati_remote2_mutex); + + if (ar2->flags & ATI_REMOTE2_OPENED) + ati_remote2_kill_urbs(ar2); + + ar2->flags |= ATI_REMOTE2_SUSPENDED; + + mutex_unlock(&ati_remote2_mutex); + + return 0; +} + +static int ati_remote2_resume(struct usb_interface *interface) +{ + struct ati_remote2 *ar2; + struct usb_host_interface *alt = interface->cur_altsetting; + int r = 0; + + if (alt->desc.bInterfaceNumber) + return 0; + + ar2 = usb_get_intfdata(interface); + + dev_dbg(&ar2->intf[0]->dev, "%s()\n", __func__); + + mutex_lock(&ati_remote2_mutex); + + if (ar2->flags & ATI_REMOTE2_OPENED) + r = ati_remote2_submit_urbs(ar2); + + if (!r) + ar2->flags &= ~ATI_REMOTE2_SUSPENDED; + + mutex_unlock(&ati_remote2_mutex); + + return r; +} + static int __init ati_remote2_init(void) { int r; -- cgit v0.10.2 From 03bac96fae0efdb25e2059e5accbe4f3ee6328dd Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 23 Jun 2008 10:47:34 -0400 Subject: Input: expand keycode space Expand the number of potential key codes from 512 to 768 since people are coming up with more and more keys. Signed-off-by: Dmitry Torokhov diff --git a/include/linux/input.h b/include/linux/input.h index a5802c9..7fae1de 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -579,7 +579,7 @@ struct input_absinfo { /* We avoid low common keys in module aliases so they don't get huge. */ #define KEY_MIN_INTERESTING KEY_MUTE -#define KEY_MAX 0x1ff +#define KEY_MAX 0x2ff #define KEY_CNT (KEY_MAX+1) /* diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index c4db582..0dddfa4 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -274,7 +274,7 @@ struct pcmcia_device_id { /* Input */ #define INPUT_DEVICE_ID_EV_MAX 0x1f #define INPUT_DEVICE_ID_KEY_MIN_INTERESTING 0x71 -#define INPUT_DEVICE_ID_KEY_MAX 0x1ff +#define INPUT_DEVICE_ID_KEY_MAX 0x2ff #define INPUT_DEVICE_ID_REL_MAX 0x0f #define INPUT_DEVICE_ID_ABS_MAX 0x3f #define INPUT_DEVICE_ID_MSC_MAX 0x07 -- cgit v0.10.2 From 5a599a15182ed48e5bf54111feb3b21e425e194d Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Mon, 23 Jun 2008 10:47:53 -0400 Subject: Input: add keycodes for remote controls/phone keypads The new keys are separate from normal numeric keys and standard numeric keypads. The userspace should not attempt to apply modifiers like shift and NumLock to these so tey work properly regardless of the language mapping used. Signed-off-by: Dmitry Torokhov diff --git a/include/linux/input.h b/include/linux/input.h index 7fae1de..b86fb55 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -577,6 +577,19 @@ struct input_absinfo { #define KEY_BRL_DOT9 0x1f9 #define KEY_BRL_DOT10 0x1fa +#define KEY_NUMERIC_0 0x200 /* used by phones, remote controls, */ +#define KEY_NUMERIC_1 0x201 /* and other keypads */ +#define KEY_NUMERIC_2 0x202 +#define KEY_NUMERIC_3 0x203 +#define KEY_NUMERIC_4 0x204 +#define KEY_NUMERIC_5 0x205 +#define KEY_NUMERIC_6 0x206 +#define KEY_NUMERIC_7 0x207 +#define KEY_NUMERIC_8 0x208 +#define KEY_NUMERIC_9 0x209 +#define KEY_NUMERIC_STAR 0x20a +#define KEY_NUMERIC_POUND 0x20b + /* We avoid low common keys in module aliases so they don't get huge. */ #define KEY_MIN_INTERESTING KEY_MUTE #define KEY_MAX 0x2ff -- cgit v0.10.2 From a648bf4632628c787abb0514277f2a231fca39ca Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:18 -0700 Subject: x86, xsave: xsave cpuid feature bits Add xsave CPU feature bits. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c index 0bf4d37..7415472 100644 --- a/arch/x86/kernel/cpu/feature_names.c +++ b/arch/x86/kernel/cpu/feature_names.c @@ -46,7 +46,7 @@ const char * const x86_cap_flags[NCAPINTS*32] = { "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, NULL, NULL, "dca", "sse4_1", "sse4_2", "x2apic", NULL, "popcnt", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, "xsave", NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 42afe9c..c76b3f6 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -92,6 +92,7 @@ #define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ #define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ #define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ +#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ #define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */ @@ -191,6 +192,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) +#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 -- cgit v0.10.2 From dc1e35c6e95e8923cf1d3510438b63c600fee1e2 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:19 -0700 Subject: x86, xsave: enable xsave/xrstor on cpus with xsave support Enables xsave/xrstor by turning on cr4.osxsave on cpu's which have the xsave support. For now, features that OS supports/enabled are FP and SSE. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a07ec14..d6ea91a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -38,7 +38,7 @@ obj-y += tsc.o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o -obj-y += i387.o +obj-y += i387.o xsave.o obj-y += ptrace.o obj-y += ds.o obj-$(CONFIG_X86_32) += tls.o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80ab20d..fabbcb7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -712,6 +712,14 @@ void __cpuinit cpu_init(void) current_thread_info()->status = 0; clear_used_math(); mxcsr_feature_mask_init(); + + /* + * Boot processor to setup the FP and extended state context info. + */ + if (!smp_processor_id()) + init_thread_xstate(); + + xsave_init(); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index eb9ddd8..e22a9a9 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -61,6 +61,11 @@ void __init init_thread_xstate(void) return; } + if (cpu_has_xsave) { + xsave_cntxt_init(); + return; + } + if (cpu_has_fxsr) xstate_size = sizeof(struct i387_fxsave_struct); #ifdef CONFIG_X86_32 @@ -83,6 +88,13 @@ void __cpuinit fpu_init(void) write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ + /* + * Boot processor to setup the FP and extended state context info. + */ + if (!smp_processor_id()) + init_thread_xstate(); + xsave_init(); + mxcsr_feature_mask_init(); /* clean state in init */ current_thread_info()->status = 0; diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 03df8e4..da5a596 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -1228,7 +1228,6 @@ void __init trap_init(void) set_bit(SYSCALL_VECTOR, used_vectors); - init_thread_xstate(); /* * Should be a barrier for any external CPU state: */ diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 513caac..3580a79 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -1173,10 +1173,6 @@ void __init trap_init(void) set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); #endif /* - * initialize the per thread extended state: - */ - init_thread_xstate(); - /* * Should be a barrier for any external CPU state: */ cpu_init(); diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c new file mode 100644 index 0000000..c68b7c4 --- /dev/null +++ b/arch/x86/kernel/xsave.c @@ -0,0 +1,87 @@ +/* + * xsave/xrstor support. + * + * Author: Suresh Siddha + */ +#include +#include +#include + +/* + * Supported feature mask by the CPU and the kernel. + */ +unsigned int pcntxt_hmask, pcntxt_lmask; + +/* + * Represents init state for the supported extended state. + */ +struct xsave_struct *init_xstate_buf; + +/* + * Enable the extended processor state save/restore feature + */ +void __cpuinit xsave_init(void) +{ + if (!cpu_has_xsave) + return; + + set_in_cr4(X86_CR4_OSXSAVE); + + /* + * Enable all the features that the HW is capable of + * and the Linux kernel is aware of. + * + * xsetbv(); + */ + asm volatile(".byte 0x0f,0x01,0xd1" : : "c" (0), + "a" (pcntxt_lmask), "d" (pcntxt_hmask)); +} + +/* + * setup the xstate image representing the init state + */ +void setup_xstate_init(void) +{ + init_xstate_buf = alloc_bootmem(xstate_size); + init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; +} + +/* + * Enable and initialize the xsave feature. + */ +void __init xsave_cntxt_init(void) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + + pcntxt_lmask = eax; + pcntxt_hmask = edx; + + if ((pcntxt_lmask & XSTATE_FPSSE) != XSTATE_FPSSE) { + printk(KERN_ERR "FP/SSE not shown under xsave features %x\n", + pcntxt_lmask); + BUG(); + } + + /* + * for now OS knows only about FP/SSE + */ + pcntxt_lmask = pcntxt_lmask & XCNTXT_LMASK; + pcntxt_hmask = pcntxt_hmask & XCNTXT_HMASK; + + xsave_init(); + + /* + * Recompute the context size for enabled features + */ + cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + + xstate_size = ebx; + + setup_xstate_init(); + + printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%Lx, " + "cntxt size 0x%x\n", + (pcntxt_lmask | ((u64) pcntxt_hmask << 32)), xstate_size); +} diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index 3958de6..6a66478 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h @@ -18,6 +18,7 @@ #include #include #include +#include extern void fpu_init(void); extern void mxcsr_feature_mask_init(void); diff --git a/include/asm-x86/processor-flags.h b/include/asm-x86/processor-flags.h index 5dd7977..dc5f071 100644 --- a/include/asm-x86/processor-flags.h +++ b/include/asm-x86/processor-flags.h @@ -59,6 +59,7 @@ #define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */ #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ +#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ /* * x86-64 Task Priority Register, CR8 diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index d60b4d8..d7c0221 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -346,6 +346,18 @@ struct i387_soft_struct { u32 entry_eip; }; +struct xsave_hdr_struct { + u64 xstate_bv; + u64 reserved1[2]; + u64 reserved2[5]; +} __attribute__((packed)); + +struct xsave_struct { + struct i387_fxsave_struct i387; + struct xsave_hdr_struct xsave_hdr; + /* new processor state extensions will go here */ +} __attribute__ ((packed, aligned (64))); + union thread_xstate { struct i387_fsave_struct fsave; struct i387_fxsave_struct fxsave; diff --git a/include/asm-x86/xsave.h b/include/asm-x86/xsave.h new file mode 100644 index 0000000..6d70e62 --- /dev/null +++ b/include/asm-x86/xsave.h @@ -0,0 +1,26 @@ +#ifndef __ASM_X86_XSAVE_H +#define __ASM_X86_XSAVE_H + +#include +#include + +#define XSTATE_FP 0x1 +#define XSTATE_SSE 0x2 + +#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) + +#define FXSAVE_SIZE 512 + +/* + * These are the features that the OS can handle currently. + */ +#define XCNTXT_LMASK (XSTATE_FP | XSTATE_SSE) +#define XCNTXT_HMASK 0x0 + +extern unsigned int xstate_size, pcntxt_hmask, pcntxt_lmask; +extern struct xsave_struct *init_xstate_buf; + +extern void xsave_cntxt_init(void); +extern void xsave_init(void); + +#endif -- cgit v0.10.2 From b359e8a434cc3d09847010fc4aeccf48d69740e4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:20 -0700 Subject: x86, xsave: context switch support using xsave/xrstor Uses xsave/xrstor (instead of traditional fxsave/fxrstor) in context switch when available. Introduces TS_XSAVE flag, which determine the need to use xsave/xrstor instructions during context switch instead of the legacy fxsave/fxrstor instructions. Thread-synchronous status word is already in L1 cache during this code patch and thus minimizes the performance penality compared to (cpu_has_xsave) checks. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fabbcb7..6c2b9e7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -709,7 +709,10 @@ void __cpuinit cpu_init(void) /* * Force FPU initialization: */ - current_thread_info()->status = 0; + if (cpu_has_xsave) + current_thread_info()->status = TS_XSAVE; + else + current_thread_info()->status = 0; clear_used_math(); mxcsr_feature_mask_init(); diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index e22a9a9..b778e17 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -97,7 +97,10 @@ void __cpuinit fpu_init(void) mxcsr_feature_mask_init(); /* clean state in init */ - current_thread_info()->status = 0; + if (cpu_has_xsave) + current_thread_info()->status = TS_XSAVE; + else + current_thread_info()->status = 0; clear_used_math(); } #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 3580a79..38eb761 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -1134,7 +1134,7 @@ asmlinkage void math_state_restore(void) /* * Paranoid restore. send a SIGSEGV if we fail to restore the state. */ - if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) { + if (unlikely(restore_fpu_checking(me))) { stts(); force_sig(SIGSEGV, me); return; diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index 6a66478..a6d256f4 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h @@ -36,6 +36,8 @@ extern int save_i387_ia32(struct _fpstate_ia32 __user *buf); extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf); #endif +#define X87_FSW_ES (1 << 7) /* Exception Summary */ + #ifdef CONFIG_X86_64 /* Ignore delayed exceptions from user space */ @@ -46,7 +48,7 @@ static inline void tolerant_fwait(void) _ASM_EXTABLE(1b, 2b)); } -static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) +static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { int err; @@ -66,15 +68,31 @@ static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) return err; } -#define X87_FSW_ES (1 << 7) /* Exception Summary */ +static inline int restore_fpu_checking(struct task_struct *tsk) +{ + if (task_thread_info(tsk)->status & TS_XSAVE) + return xrstor_checking(&tsk->thread.xstate->xsave); + else + return fxrstor_checking(&tsk->thread.xstate->fxsave); +} /* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception is pending. Clear the x87 state here by setting it to fixed values. The kernel data segment can be sometimes 0 and sometimes new user value. Both should be ok. Use the PDA as safe address because it should be already in L1. */ -static inline void clear_fpu_state(struct i387_fxsave_struct *fx) +static inline void clear_fpu_state(struct task_struct *tsk) { + struct xsave_struct *xstate = &tsk->thread.xstate->xsave; + struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; + + /* + * xsave header may indicate the init state of the FP. + */ + if ((task_thread_info(tsk)->status & TS_XSAVE) && + !(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) + return; + if (unlikely(fx->swd & X87_FSW_ES)) asm volatile("fnclex"); alternative_input(ASM_NOP8 ASM_NOP2, @@ -107,7 +125,7 @@ static inline int save_i387_checking(struct i387_fxsave_struct __user *fx) return err; } -static inline void __save_init_fpu(struct task_struct *tsk) +static inline void fxsave(struct task_struct *tsk) { /* Using "rex64; fxsave %0" is broken because, if the memory operand uses any extended registers for addressing, a second REX prefix @@ -132,7 +150,16 @@ static inline void __save_init_fpu(struct task_struct *tsk) : "=m" (tsk->thread.xstate->fxsave) : "cdaSDb" (&tsk->thread.xstate->fxsave)); #endif - clear_fpu_state(&tsk->thread.xstate->fxsave); +} + +static inline void __save_init_fpu(struct task_struct *tsk) +{ + if (task_thread_info(tsk)->status & TS_XSAVE) + xsave(tsk); + else + fxsave(tsk); + + clear_fpu_state(tsk); task_thread_info(tsk)->status &= ~TS_USEDFPU; } @@ -147,6 +174,10 @@ static inline void tolerant_fwait(void) static inline void restore_fpu(struct task_struct *tsk) { + if (task_thread_info(tsk)->status & TS_XSAVE) { + xrstor_checking(&tsk->thread.xstate->xsave); + return; + } /* * The "nop" is needed to make the instructions the same * length. @@ -172,6 +203,27 @@ static inline void restore_fpu(struct task_struct *tsk) */ static inline void __save_init_fpu(struct task_struct *tsk) { + if (task_thread_info(tsk)->status & TS_XSAVE) { + struct xsave_struct *xstate = &tsk->thread.xstate->xsave; + struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; + + xsave(tsk); + + /* + * xsave header may indicate the init state of the FP. + */ + if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) + goto end; + + if (unlikely(fx->swd & X87_FSW_ES)) + asm volatile("fnclex"); + + /* + * we can do a simple return here or be paranoid :) + */ + goto clear_state; + } + /* Use more nops than strictly needed in case the compiler varies code */ alternative_input( @@ -181,6 +233,7 @@ static inline void __save_init_fpu(struct task_struct *tsk) X86_FEATURE_FXSR, [fx] "m" (tsk->thread.xstate->fxsave), [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory"); +clear_state: /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is pending. Clear the x87 state here by setting it to fixed values. safe_address is a random variable that should be in L1 */ @@ -190,6 +243,7 @@ static inline void __save_init_fpu(struct task_struct *tsk) "fildl %[addr]", /* set F?P to defined value */ X86_FEATURE_FXSAVE_LEAK, [addr] "m" (safe_address)); +end: task_thread_info(tsk)->status &= ~TS_USEDFPU; } diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index d7c0221..77b7af6 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -362,6 +362,7 @@ union thread_xstate { struct i387_fsave_struct fsave; struct i387_fxsave_struct fxsave; struct i387_soft_struct soft; + struct xsave_struct xsave; }; #ifdef CONFIG_X86_64 diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index e64be88..30586f2 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -239,6 +239,7 @@ static inline struct thread_info *stack_thread_info(void) #define TS_POLLING 0x0004 /* true if in idle loop and not sleeping */ #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ +#define TS_XSAVE 0x0010 /* Use xsave/xrstor */ #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) diff --git a/include/asm-x86/xsave.h b/include/asm-x86/xsave.h index 6d70e62..e835a91 100644 --- a/include/asm-x86/xsave.h +++ b/include/asm-x86/xsave.h @@ -17,10 +17,43 @@ #define XCNTXT_LMASK (XSTATE_FP | XSTATE_SSE) #define XCNTXT_HMASK 0x0 +#ifdef CONFIG_X86_64 +#define REX_PREFIX "0x48, " +#else +#define REX_PREFIX +#endif + extern unsigned int xstate_size, pcntxt_hmask, pcntxt_lmask; extern struct xsave_struct *init_xstate_buf; extern void xsave_cntxt_init(void); extern void xsave_init(void); - +extern int init_fpu(struct task_struct *child); + +static inline int xrstor_checking(struct xsave_struct *fx) +{ + int err; + + asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [err] "=r" (err) + : "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0) + : "memory"); + + return err; +} + +static inline void xsave(struct task_struct *tsk) +{ + /* This, however, we can work around by forcing the compiler to select + an addressing mode that doesn't require extended registers. */ + __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27" + : : "D" (&(tsk->thread.xstate->xsave)), + "a" (-1), "d"(-1) : "memory"); +} #endif -- cgit v0.10.2 From 3c1c7f101426cb2ecc79d817a8a65928965fc860 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:21 -0700 Subject: x86, xsave: dynamically allocate sigframes fpstate instead of static allocation dynamically allocate fpstate on the stack, instead of static allocation in the current sigframe layout on the user stack. This will allow the fpstate structure to grow in the future, which includes extended state information supporting xsave/xrstor. signal handlers will be able to access the fpstate pointer from the sigcontext structure asusual, with no change. For the non RT sigframe's (which are supported only for 32bit apps), current static fpstate layout in the sigframe will be unused(so that we don't change the extramask[] offset in the sigframe and thus prevent breaking app's which modify extramask[]). Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 20af4c7..a05bf0f 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -179,9 +179,10 @@ struct sigframe u32 pretcode; int sig; struct sigcontext_ia32 sc; - struct _fpstate_ia32 fpstate; + struct _fpstate_ia32 fpstate_unused; /* look at kernel/sigframe.h */ unsigned int extramask[_COMPAT_NSIG_WORDS-1]; char retcode[8]; + /* fp state follows here */ }; struct rt_sigframe @@ -192,8 +193,8 @@ struct rt_sigframe u32 puc; compat_siginfo_t info; struct ucontext_ia32 uc; - struct _fpstate_ia32 fpstate; char retcode[8]; + /* fp state follows here */ }; #define COPY(x) { \ @@ -402,7 +403,8 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, * Determine which stack to use.. */ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, - size_t frame_size) + size_t frame_size, + struct _fpstate_ia32 **fpstate) { unsigned long sp; @@ -421,6 +423,11 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, ka->sa.sa_restorer) sp = (unsigned long) ka->sa.sa_restorer; + if (used_math()) { + sp = sp - sig_xstate_ia32_size; + *fpstate = (struct _fpstate_ia32 *) sp; + } + sp -= frame_size; /* Align the stack pointer according to the i386 ABI, * i.e. so that on function entry ((sp + 4) & 15) == 0. */ @@ -434,6 +441,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, struct sigframe __user *frame; void __user *restorer; int err = 0; + struct _fpstate_ia32 __user *fpstate = NULL; /* copy_to_user optimizes that into a single 8 byte store */ static const struct { @@ -448,7 +456,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, 0, }; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -457,8 +465,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, if (err) goto give_sigsegv; - err |= ia32_setup_sigcontext(&frame->sc, &frame->fpstate, regs, - set->sig[0]); + err |= ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]); if (err) goto give_sigsegv; @@ -522,6 +529,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, struct rt_sigframe __user *frame; void __user *restorer; int err = 0; + struct _fpstate_ia32 __user *fpstate = NULL; /* __copy_to_user optimizes that into a single 8 byte store */ static const struct { @@ -537,7 +545,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 0, }; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -556,7 +564,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, + err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index b778e17..51fb288 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -24,6 +24,7 @@ # define save_i387_ia32 save_i387 # define restore_i387_ia32 restore_i387 # define _fpstate_ia32 _fpstate +# define sig_xstate_ia32_size sig_xstate_size # define user_i387_ia32_struct user_i387_struct # define user32_fxsr_struct user_fxsr_struct #endif @@ -36,6 +37,7 @@ static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; unsigned int xstate_size; +unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); static struct i387_fxsave_struct fx_scratch __cpuinitdata; void __cpuinit mxcsr_feature_mask_init(void) diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h index 72bbb51..6dd7e2b 100644 --- a/arch/x86/kernel/sigframe.h +++ b/arch/x86/kernel/sigframe.h @@ -3,9 +3,18 @@ struct sigframe { char __user *pretcode; int sig; struct sigcontext sc; - struct _fpstate fpstate; + /* + * fpstate is unused. fpstate is moved/allocated after + * retcode[] below. This movement allows to have the FP state and the + * future state extensions (xsave) stay together. + * And at the same time retaining the unused fpstate, prevents changing + * the offset of extramask[] in the sigframe and thus prevent any + * legacy application accessing/modifying it. + */ + struct _fpstate fpstate_unused; unsigned long extramask[_NSIG_WORDS-1]; char retcode[8]; + /* fp state follows here */ }; struct rt_sigframe { @@ -15,13 +24,14 @@ struct rt_sigframe { void __user *puc; struct siginfo info; struct ucontext uc; - struct _fpstate fpstate; char retcode[8]; + /* fp state follows here */ }; #else struct rt_sigframe { char __user *pretcode; struct ucontext uc; struct siginfo info; + /* fp state follows here */ }; #endif diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 6fb5bcd..19a7a56 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -306,7 +306,8 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, * Determine which stack to use.. */ static inline void __user * -get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) +get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, + struct _fpstate **fpstate) { unsigned long sp; @@ -332,6 +333,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) sp = (unsigned long) ka->sa.sa_restorer; } + if (used_math()) { + sp = sp - sig_xstate_size; + *fpstate = (struct _fpstate *) sp; + } + sp -= frame_size; /* * Align the stack pointer according to the i386 ABI, @@ -350,8 +356,9 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, void __user *restorer; int err = 0; int usig; + struct _fpstate __user *fpstate = NULL; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -366,7 +373,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, if (err) goto give_sigsegv; - err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); + err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]); if (err) goto give_sigsegv; @@ -427,8 +434,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, void __user *restorer; int err = 0; int usig; + struct _fpstate __user *fpstate = NULL; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -453,7 +461,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, + err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ca316b5..0deab8e 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -281,7 +281,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, struct task_struct *me = current; if (used_math()) { - fp = get_stack(ka, regs, sizeof(struct _fpstate)); + fp = get_stack(ka, regs, sig_xstate_size); frame = (void __user *)round_down( (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index c68b7c4..7ad169e 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -17,6 +17,10 @@ unsigned int pcntxt_hmask, pcntxt_lmask; */ struct xsave_struct *init_xstate_buf; +#ifdef CONFIG_X86_64 +unsigned int sig_xstate_size = sizeof(struct _fpstate); +#endif + /* * Enable the extended processor state save/restore feature */ diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index a6d256f4..36dca8d 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h @@ -20,6 +20,7 @@ #include #include +extern unsigned int sig_xstate_size; extern void fpu_init(void); extern void mxcsr_feature_mask_init(void); extern int init_fpu(struct task_struct *child); @@ -31,6 +32,7 @@ extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set; #ifdef CONFIG_IA32_EMULATION +extern unsigned int sig_xstate_ia32_size; struct _fpstate_ia32; extern int save_i387_ia32(struct _fpstate_ia32 __user *buf); extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf); -- cgit v0.10.2 From ab5137015fed9b948fe835a2d99a4cfbd50a0c40 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:22 -0700 Subject: x86, xsave: reorganization of signal save/restore fpstate code layout move 64bit routines that saves/restores fpstate in/from user stack from signal_64.c to xsave.c restore_i387_xstate() now handles the condition when user passes NULL fpstate. Other misc changes for prepartion of xsave/xrstor sigcontext support. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index a05bf0f..c596eab 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -216,7 +216,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, unsigned int *peax) { unsigned int tmpflags, gs, oldgs, err = 0; - struct _fpstate_ia32 __user *buf; + void __user *buf; u32 tmp; /* Always make any pending restarted system calls return -EINTR */ @@ -260,26 +260,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, err |= __get_user(tmp, &sc->fpstate); buf = compat_ptr(tmp); - if (buf) { - if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387_ia32(buf); - } else { - struct task_struct *me = current; - - if (used_math()) { - clear_fpu(me); - clear_used_math(); - } - } + err |= restore_i387_xstate_ia32(buf); err |= __get_user(tmp, &sc->ax); *peax = tmp; return err; - -badframe: - return 1; } asmlinkage long sys32_sigreturn(struct pt_regs *regs) @@ -351,7 +337,7 @@ badframe: */ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, - struct _fpstate_ia32 __user *fpstate, + void __user *fpstate, struct pt_regs *regs, unsigned int mask) { int tmp, err = 0; @@ -382,7 +368,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, err |= __put_user((u32)regs->flags, &sc->flags); err |= __put_user((u32)regs->sp, &sc->sp_at_signal); - tmp = save_i387_ia32(fpstate); + tmp = save_i387_xstate_ia32(fpstate); if (tmp < 0) err = -EFAULT; else { @@ -404,7 +390,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, */ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, - struct _fpstate_ia32 **fpstate) + void **fpstate) { unsigned long sp; @@ -441,7 +427,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, struct sigframe __user *frame; void __user *restorer; int err = 0; - struct _fpstate_ia32 __user *fpstate = NULL; + void __user *fpstate = NULL; /* copy_to_user optimizes that into a single 8 byte store */ static const struct { @@ -529,7 +515,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, struct rt_sigframe __user *frame; void __user *restorer; int err = 0; - struct _fpstate_ia32 __user *fpstate = NULL; + void __user *fpstate = NULL; /* __copy_to_user optimizes that into a single 8 byte store */ static const struct { diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 51fb288..7daf3a0 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -21,9 +21,10 @@ # include # include #else -# define save_i387_ia32 save_i387 -# define restore_i387_ia32 restore_i387 +# define save_i387_xstate_ia32 save_i387_xstate +# define restore_i387_xstate_ia32 restore_i387_xstate # define _fpstate_ia32 _fpstate +# define _xstate_ia32 _xstate # define sig_xstate_ia32_size sig_xstate_size # define user_i387_ia32_struct user_i387_struct # define user32_fxsr_struct user_fxsr_struct @@ -424,7 +425,6 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) struct task_struct *tsk = current; struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; - unlazy_fpu(tsk); fp->status = fp->swd; if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) return -1; @@ -438,8 +438,6 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) struct user_i387_ia32_struct env; int err = 0; - unlazy_fpu(tsk); - convert_from_fxsr(&env, tsk); if (__copy_to_user(buf, &env, sizeof(env))) return -1; @@ -455,10 +453,16 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) return 1; } -int save_i387_ia32(struct _fpstate_ia32 __user *buf) +int save_i387_xstate_ia32(void __user *buf) { + struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; + struct task_struct *tsk = current; + if (!used_math()) return 0; + + if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size)) + return -EACCES; /* * This will cause a "finit" to be triggered by the next * attempted FPU operation by the 'current' process. @@ -468,13 +472,15 @@ int save_i387_ia32(struct _fpstate_ia32 __user *buf) if (!HAVE_HWFP) { return fpregs_soft_get(current, NULL, 0, sizeof(struct user_i387_ia32_struct), - NULL, buf) ? -1 : 1; + NULL, fp) ? -1 : 1; } + unlazy_fpu(tsk); + if (cpu_has_fxsr) - return save_i387_fxsave(buf); + return save_i387_fxsave(fp); else - return save_i387_fsave(buf); + return save_i387_fsave(fp); } static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) @@ -502,14 +508,26 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) return 0; } -int restore_i387_ia32(struct _fpstate_ia32 __user *buf) +int restore_i387_xstate_ia32(void __user *buf) { int err; struct task_struct *tsk = current; + struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; if (HAVE_HWFP) clear_fpu(tsk); + if (!buf) { + if (used_math()) { + clear_fpu(tsk); + clear_used_math(); + } + + return 0; + } else + if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size)) + return -EACCES; + if (!used_math()) { err = init_fpu(tsk); if (err) @@ -518,13 +536,13 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf) if (HAVE_HWFP) { if (cpu_has_fxsr) - err = restore_i387_fxsave(buf); + err = restore_i387_fxsave(fp); else - err = restore_i387_fsave(buf); + err = restore_i387_fsave(fp); } else { err = fpregs_soft_set(current, NULL, 0, sizeof(struct user_i387_ia32_struct), - NULL, buf) != 0; + NULL, fp) != 0; } set_used_math(); diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 19a7a56..690cc61 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -159,28 +159,14 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, } { - struct _fpstate __user *buf; + void __user *buf; err |= __get_user(buf, &sc->fpstate); - if (buf) { - if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387(buf); - } else { - struct task_struct *me = current; - - if (used_math()) { - clear_fpu(me); - clear_used_math(); - } - } + err |= restore_i387_xstate(buf); } err |= __get_user(*pax, &sc->ax); return err; - -badframe: - return 1; } asmlinkage unsigned long sys_sigreturn(unsigned long __unused) @@ -262,7 +248,7 @@ badframe: * Set up a signal frame. */ static int -setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, +setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, struct pt_regs *regs, unsigned long mask) { int tmp, err = 0; @@ -289,7 +275,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, err |= __put_user(regs->sp, &sc->sp_at_signal); err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); - tmp = save_i387(fpstate); + tmp = save_i387_xstate(fpstate); if (tmp < 0) err = 1; else @@ -307,7 +293,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, */ static inline void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, - struct _fpstate **fpstate) + void **fpstate) { unsigned long sp; @@ -356,7 +342,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, void __user *restorer; int err = 0; int usig; - struct _fpstate __user *fpstate = NULL; + void __user *fpstate = NULL; frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); @@ -434,7 +420,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, void __user *restorer; int err = 0; int usig; - struct _fpstate __user *fpstate = NULL; + void __user *fpstate = NULL; frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 0deab8e..ddf6123 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -54,69 +54,6 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, } /* - * Signal frame handlers. - */ - -static inline int save_i387(struct _fpstate __user *buf) -{ - struct task_struct *tsk = current; - int err = 0; - - BUILD_BUG_ON(sizeof(struct user_i387_struct) != - sizeof(tsk->thread.xstate->fxsave)); - - if ((unsigned long)buf % 16) - printk("save_i387: bad fpstate %p\n", buf); - - if (!used_math()) - return 0; - clear_used_math(); /* trigger finit */ - if (task_thread_info(tsk)->status & TS_USEDFPU) { - err = save_i387_checking((struct i387_fxsave_struct __user *) - buf); - if (err) - return err; - task_thread_info(tsk)->status &= ~TS_USEDFPU; - stts(); - } else { - if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, - sizeof(struct i387_fxsave_struct))) - return -1; - } - return 1; -} - -/* - * This restores directly out of user space. Exceptions are handled. - */ -static inline int restore_i387(struct _fpstate __user *buf) -{ - struct task_struct *tsk = current; - int err; - - if (!used_math()) { - err = init_fpu(tsk); - if (err) - return err; - } - - if (!(task_thread_info(current)->status & TS_USEDFPU)) { - clts(); - task_thread_info(current)->status |= TS_USEDFPU; - } - err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf); - if (unlikely(err)) { - /* - * Encountered an error while doing the restore from the - * user buffer, clear the fpu state. - */ - clear_fpu(tsk); - clear_used_math(); - } - return err; -} - -/* * Do a signal return; undo the signal stack. */ static int @@ -160,25 +97,11 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, { struct _fpstate __user * buf; err |= __get_user(buf, &sc->fpstate); - - if (buf) { - if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387(buf); - } else { - struct task_struct *me = current; - if (used_math()) { - clear_fpu(me); - clear_used_math(); - } - } + err |= restore_i387_xstate(buf); } err |= __get_user(*pax, &sc->ax); return err; - -badframe: - return 1; } asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) @@ -276,7 +199,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs) { struct rt_sigframe __user *frame; - struct _fpstate __user *fp = NULL; + void __user *fp = NULL; int err = 0; struct task_struct *me = current; @@ -288,7 +211,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) goto give_sigsegv; - if (save_i387(fp) < 0) + if (save_i387_xstate(fp) < 0) err |= -1; } else frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 7ad169e..608e72d 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -12,6 +12,85 @@ */ unsigned int pcntxt_hmask, pcntxt_lmask; +#ifdef CONFIG_X86_64 +/* + * Signal frame handlers. + */ + +int save_i387_xstate(void __user *buf) +{ + struct task_struct *tsk = current; + int err = 0; + + if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) + return -EACCES; + + BUILD_BUG_ON(sizeof(struct user_i387_struct) != + sizeof(tsk->thread.xstate->fxsave)); + + if ((unsigned long)buf % 16) + printk("save_i387_xstate: bad fpstate %p\n", buf); + + if (!used_math()) + return 0; + clear_used_math(); /* trigger finit */ + if (task_thread_info(tsk)->status & TS_USEDFPU) { + err = save_i387_checking((struct i387_fxsave_struct __user *) + buf); + if (err) + return err; + task_thread_info(tsk)->status &= ~TS_USEDFPU; + stts(); + } else { + if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, + xstate_size)) + return -1; + } + return 1; +} + +/* + * This restores directly out of user space. Exceptions are handled. + */ +int restore_i387_xstate(void __user *buf) +{ + struct task_struct *tsk = current; + int err; + + if (!buf) { + if (used_math()) { + clear_fpu(tsk); + clear_used_math(); + } + + return 0; + } else + if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) + return -EACCES; + + if (!used_math()) { + err = init_fpu(tsk); + if (err) + return err; + } + + if (!(task_thread_info(current)->status & TS_USEDFPU)) { + clts(); + task_thread_info(current)->status |= TS_USEDFPU; + } + err = fxrstor_checking((__force struct i387_fxsave_struct *)buf); + if (unlikely(err)) { + /* + * Encountered an error while doing the restore from the + * user buffer, clear the fpu state. + */ + clear_fpu(tsk); + clear_used_math(); + } + return err; +} +#endif + /* * Represents init state for the supported extended state. */ diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index 36dca8d..dc3745e 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h @@ -34,8 +34,9 @@ extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set; #ifdef CONFIG_IA32_EMULATION extern unsigned int sig_xstate_ia32_size; struct _fpstate_ia32; -extern int save_i387_ia32(struct _fpstate_ia32 __user *buf); -extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf); +struct _xstate_ia32; +extern int save_i387_xstate_ia32(void __user *buf); +extern int restore_i387_xstate_ia32(void __user *buf); #endif #define X87_FSW_ES (1 << 7) /* Exception Summary */ @@ -249,13 +250,13 @@ end: task_thread_info(tsk)->status &= ~TS_USEDFPU; } +#endif /* CONFIG_X86_64 */ + /* * Signal frame handlers... */ -extern int save_i387(struct _fpstate __user *buf); -extern int restore_i387(struct _fpstate __user *buf); - -#endif /* CONFIG_X86_64 */ +extern int save_i387_xstate(void __user *buf); +extern int restore_i387_xstate(void __user *buf); static inline void __unlazy_fpu(struct task_struct *tsk) { -- cgit v0.10.2 From 9dc89c0f96a6ce6a1b7f9a47dd8bf6f17e2002c9 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:23 -0700 Subject: x86, xsave: xsave/xrstor specific routines Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/xsave.h b/include/asm-x86/xsave.h index e835a91..b716511 100644 --- a/include/asm-x86/xsave.h +++ b/include/asm-x86/xsave.h @@ -48,6 +48,58 @@ static inline int xrstor_checking(struct xsave_struct *fx) return err; } +static inline int xsave_check(struct xsave_struct __user *buf) +{ + int err; + __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + _ASM_ALIGN "\n" + _ASM_PTR "1b,3b\n" + ".previous" + : [err] "=r" (err) + : "D" (buf), "a" (-1), "d" (-1), "0" (0) + : "memory"); + if (unlikely(err) && __clear_user(buf, xstate_size)) + err = -EFAULT; + /* No need to clear here because the caller clears USED_MATH */ + return err; +} + +static inline int xrestore_user(struct xsave_struct __user *buf, + unsigned int lmask, + unsigned int hmask) +{ + int err; + struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); + + __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + _ASM_ALIGN "\n" + _ASM_PTR "1b,3b\n" + ".previous" + : [err] "=r" (err) + : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) + : "memory"); /* memory required? */ + return err; +} + +static inline void xrstor_state(struct xsave_struct *fx, int lmask, int hmask) +{ + asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); +} + static inline void xsave(struct task_struct *tsk) { /* This, however, we can work around by forcing the compiler to select -- cgit v0.10.2 From bdd8caba5ed5bb7ee21c9f061597284ffeb280bf Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:24 -0700 Subject: x86, xsave: struct _fpstate extensions to include extended state information Bytes 464..511 in the current 512byte layout of fxsave/fxrstor frame, are reserved for SW usage. On cpu's supporting xsave/xrstor, these bytes are used to extended the fpstate pointer in the sigcontext, which now includes the extended state information along with fpstate information. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 77b7af6..eb4bd8c 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -322,7 +322,12 @@ struct i387_fxsave_struct { /* 16*16 bytes for each XMM-reg = 256 bytes: */ u32 xmm_space[64]; - u32 padding[24]; + u32 padding[12]; + + union { + u32 padding1[12]; + u32 sw_reserved[12]; + }; } __attribute__((aligned(16))); diff --git a/include/asm-x86/sigcontext.h b/include/asm-x86/sigcontext.h index 24879c8..899fe2f 100644 --- a/include/asm-x86/sigcontext.h +++ b/include/asm-x86/sigcontext.h @@ -4,6 +4,40 @@ #include #include +#define FP_XSTATE_MAGIC1 0x46505853U +#define FP_XSTATE_MAGIC2 0x46505845U +#define FP_XSTATE_MAGIC2_SIZE sizeof(FP_XSTATE_MAGIC2) + +/* + * bytes 464..511 in the current 512byte layout of fxsave/fxrstor frame + * are reserved for SW usage. On cpu's supporting xsave/xrstor, these bytes + * are used to extended the fpstate pointer in the sigcontext, which now + * includes the extended state information along with fpstate information. + * + * Presence of FP_XSTATE_MAGIC1 at the beginning of this SW reserved + * area and FP_XSTATE_MAGIC2 at the end of memory layout + * (extended_size - FP_XSTATE_MAGIC2_SIZE) indicates the presence of the + * extended state information in the memory layout pointed by the fpstate + * pointer in sigcontext. + */ +struct _fpx_sw_bytes { + __u32 magic1; /* FP_XSTATE_MAGIC1 */ + __u32 extended_size; /* total size of the layout referred by + * fpstate pointer in the sigcontext. + */ + __u64 xstate_bv; + /* feature bit mask (including fp/sse/extended + * state) that is present in the memory + * layout. + */ + __u32 xstate_size; /* actual xsave state size, based on the + * features saved in the layout. + * 'extended_size' will be greater than + * 'xstate_size'. + */ + __u32 padding[7]; /* for future use. */ +}; + #ifdef __i386__ /* * As documented in the iBCS2 standard.. @@ -53,7 +87,13 @@ struct _fpstate { unsigned long reserved; struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */ struct _xmmreg _xmm[8]; - unsigned long padding[56]; + unsigned long padding1[44]; + + union { + unsigned long padding2[12]; + struct _fpx_sw_bytes sw_reserved; /* represents the extended + * state info */ + }; }; #define X86_FXSR_MAGIC 0x0000 @@ -79,7 +119,15 @@ struct sigcontext { unsigned long flags; unsigned long sp_at_signal; unsigned short ss, __ssh; - struct _fpstate __user *fpstate; + + /* + * fpstate is really (struct _fpstate *) or (struct _xstate *) + * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved + * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end + * of extended memory layout. See comments at the defintion of + * (struct _fpx_sw_bytes) + */ + void __user *fpstate; /* zero when no FPU/extended context */ unsigned long oldmask; unsigned long cr2; }; @@ -130,7 +178,12 @@ struct _fpstate { __u32 mxcsr_mask; __u32 st_space[32]; /* 8*16 bytes for each FP-reg */ __u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg */ - __u32 reserved2[24]; + __u32 reserved2[12]; + union { + __u32 reserved3[12]; + struct _fpx_sw_bytes sw_reserved; /* represents the extended + * state information */ + }; }; #ifdef __KERNEL__ @@ -161,7 +214,15 @@ struct sigcontext { unsigned long trapno; unsigned long oldmask; unsigned long cr2; - struct _fpstate __user *fpstate; /* zero when no FPU context */ + + /* + * fpstate is really (struct _fpstate *) or (struct _xstate *) + * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved + * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end + * of extended memory layout. See comments at the defintion of + * (struct _fpx_sw_bytes) + */ + void __user *fpstate; /* zero when no FPU/extended context */ unsigned long reserved1[8]; }; #else /* __KERNEL__ */ @@ -202,4 +263,22 @@ struct sigcontext { #endif /* !__i386__ */ +struct _xsave_hdr { + u64 xstate_bv; + u64 reserved1[2]; + u64 reserved2[5]; +}; + +/* + * Extended state pointed by the fpstate pointer in the sigcontext. + * In addition to the fpstate, information encoded in the xstate_hdr + * indicates the presence of other extended state information + * supported by the processor and OS. + */ +struct _xstate { + struct _fpstate fpstate; + struct _xsave_hdr xstate_hdr; + /* new processor state extensions go here */ +}; + #endif /* ASM_X86__SIGCONTEXT_H */ diff --git a/include/asm-x86/sigcontext32.h b/include/asm-x86/sigcontext32.h index 4e2ec73..8c34703 100644 --- a/include/asm-x86/sigcontext32.h +++ b/include/asm-x86/sigcontext32.h @@ -40,7 +40,11 @@ struct _fpstate_ia32 { __u32 reserved; struct _fpxreg _fxsr_st[8]; struct _xmmreg _xmm[8]; /* It's actually 16 */ - __u32 padding[56]; + __u32 padding[44]; + union { + __u32 padding2[12]; + struct _fpx_sw_bytes sw_reserved; + }; }; struct sigcontext_ia32 { -- cgit v0.10.2 From c37b5efea43f9e500363f9973dd00e3d2cdcc685 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:25 -0700 Subject: x86, xsave: save/restore the extended state context in sigframe On cpu's supporting xsave/xrstor, fpstate pointer in the sigcontext, will include the extended state information along with fpstate information. Presence of extended state information is indicated by the presence of FP_XSTATE_MAGIC1 at fpstate.sw_reserved.magic1 and FP_XSTATE_MAGIC2 at fpstate + (fpstate.sw_reserved.extended_size - FP_XSTATE_MAGIC2_SIZE). Extended feature bit mask that is saved in the memory layout is represented by the fpstate.sw_reserved.xstate_bv For RT signal frames, UC_FP_XSTATE in the uc_flags also indicate the presence of extended state information in the sigcontext's fpstate pointer. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index c596eab..f25a101 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -544,7 +544,10 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, goto give_sigsegv; /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); + if (cpu_has_xsave) + err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); + else + err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->sp), diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 7daf3a0..cbb9dc4 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -26,6 +26,7 @@ # define _fpstate_ia32 _fpstate # define _xstate_ia32 _xstate # define sig_xstate_ia32_size sig_xstate_size +# define fx_sw_reserved_ia32 fx_sw_reserved # define user_i387_ia32_struct user_i387_struct # define user32_fxsr_struct user_fxsr_struct #endif @@ -447,12 +448,30 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) if (err) return -1; - if (__copy_to_user(&buf->_fxsr_env[0], fx, - sizeof(struct i387_fxsave_struct))) + if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size)) return -1; return 1; } +static int save_i387_xsave(void __user *buf) +{ + struct _fpstate_ia32 __user *fx = buf; + int err = 0; + + if (save_i387_fxsave(fx) < 0) + return -1; + + err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32, + sizeof(struct _fpx_sw_bytes)); + err |= __put_user(FP_XSTATE_MAGIC2, + (__u32 __user *) (buf + sig_xstate_ia32_size + - FP_XSTATE_MAGIC2_SIZE)); + if (err) + return -1; + + return 1; +} + int save_i387_xstate_ia32(void __user *buf) { struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; @@ -477,6 +496,8 @@ int save_i387_xstate_ia32(void __user *buf) unlazy_fpu(tsk); + if (cpu_has_xsave) + return save_i387_xsave(fp); if (cpu_has_fxsr) return save_i387_fxsave(fp); else @@ -491,14 +512,15 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) sizeof(struct i387_fsave_struct)); } -static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) +static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, + unsigned int size) { struct task_struct *tsk = current; struct user_i387_ia32_struct env; int err; err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], - sizeof(struct i387_fxsave_struct)); + size); /* mxcsr reserved bits must be masked to zero for security reasons */ tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; if (err || __copy_from_user(&env, buf, sizeof(env))) @@ -508,6 +530,51 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) return 0; } +static int restore_i387_xsave(void __user *buf) +{ + struct _fpx_sw_bytes fx_sw_user; + struct _fpstate_ia32 __user *fx_user = + ((struct _fpstate_ia32 __user *) buf); + struct i387_fxsave_struct __user *fx = + (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; + struct xsave_hdr_struct *xsave_hdr = + ¤t->thread.xstate->xsave.xsave_hdr; + unsigned int lmask, hmask; + int err; + + if (check_for_xstate(fx, buf, &fx_sw_user)) + goto fx_only; + + lmask = fx_sw_user.xstate_bv; + hmask = fx_sw_user.xstate_bv >> 32; + + err = restore_i387_fxsave(buf, fx_sw_user.xstate_size); + + xsave_hdr->xstate_bv &= (pcntxt_lmask | (((u64) pcntxt_hmask) << 32)); + /* + * These bits must be zero. + */ + xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; + + /* + * Init the state that is not present in the memory layout + * and enabled by the OS. + */ + lmask = ~(pcntxt_lmask & ~lmask); + hmask = ~(pcntxt_hmask & ~hmask); + xsave_hdr->xstate_bv &= (lmask | (((u64) hmask) << 32)); + + return err; +fx_only: + /* + * Couldn't find the extended state information in the memory + * layout. Restore the FP/SSE and init the other extended state + * enabled by the OS. + */ + xsave_hdr->xstate_bv = XSTATE_FPSSE; + return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct)); +} + int restore_i387_xstate_ia32(void __user *buf) { int err; @@ -535,8 +602,11 @@ int restore_i387_xstate_ia32(void __user *buf) } if (HAVE_HWFP) { - if (cpu_has_fxsr) - err = restore_i387_fxsave(fp); + if (cpu_has_xsave) + err = restore_i387_xsave(buf); + else if (cpu_has_fxsr) + err = restore_i387_fxsave(fp, sizeof(struct + i387_fxsave_struct)); else err = restore_i387_fsave(fp); } else { diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 690cc61..0f98d69 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -441,7 +441,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, goto give_sigsegv; /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); + if (cpu_has_xsave) + err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); + else + err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->sp), diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ddf6123..2621b98 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -192,7 +192,7 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) sp = current->sas_ss_sp + current->sas_ss_size; } - return (void __user *)round_down(sp - size, 16); + return (void __user *)round_down(sp - size, 64); } static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, @@ -226,7 +226,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, } /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); + if (cpu_has_xsave) + err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); + else + err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->sp), diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 608e72d..dd66d07 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -6,12 +6,68 @@ #include #include #include +#ifdef CONFIG_IA32_EMULATION +#include +#endif /* * Supported feature mask by the CPU and the kernel. */ unsigned int pcntxt_hmask, pcntxt_lmask; +struct _fpx_sw_bytes fx_sw_reserved; +#ifdef CONFIG_IA32_EMULATION +struct _fpx_sw_bytes fx_sw_reserved_ia32; +#endif + +/* + * Check for the presence of extended state information in the + * user fpstate pointer in the sigcontext. + */ +int check_for_xstate(struct i387_fxsave_struct __user *buf, + void __user *fpstate, + struct _fpx_sw_bytes *fx_sw_user) +{ + int min_xstate_size = sizeof(struct i387_fxsave_struct) + + sizeof(struct xsave_hdr_struct); + unsigned int magic2; + int err; + + err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], + sizeof(struct _fpx_sw_bytes)); + + if (err) + return err; + + /* + * First Magic check failed. + */ + if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) + return -1; + + /* + * Check for error scenarios. + */ + if (fx_sw_user->xstate_size < min_xstate_size || + fx_sw_user->xstate_size > xstate_size || + fx_sw_user->xstate_size > fx_sw_user->extended_size) + return -1; + + err = __get_user(magic2, (__u32 *) (((void *)fpstate) + + fx_sw_user->extended_size - + FP_XSTATE_MAGIC2_SIZE)); + /* + * Check for the presence of second magic word at the end of memory + * layout. This detects the case where the user just copied the legacy + * fpstate layout with out copying the extended state information + * in the memory layout. + */ + if (err || magic2 != FP_XSTATE_MAGIC2) + return -1; + + return 0; +} + #ifdef CONFIG_X86_64 /* * Signal frame handlers. @@ -28,15 +84,18 @@ int save_i387_xstate(void __user *buf) BUILD_BUG_ON(sizeof(struct user_i387_struct) != sizeof(tsk->thread.xstate->fxsave)); - if ((unsigned long)buf % 16) + if ((unsigned long)buf % 64) printk("save_i387_xstate: bad fpstate %p\n", buf); if (!used_math()) return 0; clear_used_math(); /* trigger finit */ if (task_thread_info(tsk)->status & TS_USEDFPU) { - err = save_i387_checking((struct i387_fxsave_struct __user *) - buf); + if (task_thread_info(tsk)->status & TS_XSAVE) + err = xsave_user(buf); + else + err = fxsave_user(buf); + if (err) return err; task_thread_info(tsk)->status &= ~TS_USEDFPU; @@ -46,23 +105,77 @@ int save_i387_xstate(void __user *buf) xstate_size)) return -1; } + + if (task_thread_info(tsk)->status & TS_XSAVE) { + struct _fpstate __user *fx = buf; + + err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, + sizeof(struct _fpx_sw_bytes)); + + err |= __put_user(FP_XSTATE_MAGIC2, + (__u32 __user *) (buf + sig_xstate_size + - FP_XSTATE_MAGIC2_SIZE)); + } + return 1; } /* + * Restore the extended state if present. Otherwise, restore the FP/SSE + * state. + */ +int restore_user_xstate(void __user *buf) +{ + struct _fpx_sw_bytes fx_sw_user; + unsigned int lmask, hmask; + int err; + + if (((unsigned long)buf % 64) || + check_for_xstate(buf, buf, &fx_sw_user)) + goto fx_only; + + lmask = fx_sw_user.xstate_bv; + hmask = fx_sw_user.xstate_bv >> 32; + + /* + * restore the state passed by the user. + */ + err = xrestore_user(buf, lmask, hmask); + if (err) + return err; + + /* + * init the state skipped by the user. + */ + lmask = pcntxt_lmask & ~lmask; + hmask = pcntxt_hmask & ~hmask; + + xrstor_state(init_xstate_buf, lmask, hmask); + + return 0; + +fx_only: + /* + * couldn't find the extended state information in the + * memory layout. Restore just the FP/SSE and init all + * the other extended state. + */ + xrstor_state(init_xstate_buf, pcntxt_lmask & ~XSTATE_FPSSE, + pcntxt_hmask); + return fxrstor_checking((__force struct i387_fxsave_struct *)buf); +} + +/* * This restores directly out of user space. Exceptions are handled. */ int restore_i387_xstate(void __user *buf) { struct task_struct *tsk = current; - int err; + int err = 0; if (!buf) { - if (used_math()) { - clear_fpu(tsk); - clear_used_math(); - } - + if (used_math()) + goto clear; return 0; } else if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) @@ -78,12 +191,17 @@ int restore_i387_xstate(void __user *buf) clts(); task_thread_info(current)->status |= TS_USEDFPU; } - err = fxrstor_checking((__force struct i387_fxsave_struct *)buf); + if (task_thread_info(tsk)->status & TS_XSAVE) + err = restore_user_xstate(buf); + else + err = fxrstor_checking((__force struct i387_fxsave_struct *) + buf); if (unlikely(err)) { /* * Encountered an error while doing the restore from the * user buffer, clear the fpu state. */ +clear: clear_fpu(tsk); clear_used_math(); } @@ -92,6 +210,38 @@ int restore_i387_xstate(void __user *buf) #endif /* + * Prepare the SW reserved portion of the fxsave memory layout, indicating + * the presence of the extended state information in the memory layout + * pointed by the fpstate pointer in the sigcontext. + * This will be saved when ever the FP and extended state context is + * saved on the user stack during the signal handler delivery to the user. + */ +void prepare_fx_sw_frame(void) +{ + int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) + + FP_XSTATE_MAGIC2_SIZE; + + sig_xstate_size = sizeof(struct _fpstate) + size_extended; + +#ifdef CONFIG_IA32_EMULATION + sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended; +#endif + + memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved)); + + fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; + fx_sw_reserved.extended_size = sig_xstate_size; + fx_sw_reserved.xstate_bv = pcntxt_lmask | + (((u64) (pcntxt_hmask)) << 32); + fx_sw_reserved.xstate_size = xstate_size; +#ifdef CONFIG_IA32_EMULATION + memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved, + sizeof(struct _fpx_sw_bytes)); + fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size; +#endif +} + +/* * Represents init state for the supported extended state. */ struct xsave_struct *init_xstate_buf; @@ -162,6 +312,8 @@ void __init xsave_cntxt_init(void) xstate_size = ebx; + prepare_fx_sw_frame(); + setup_xstate_init(); printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%Lx, " diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index dc3745e..d3dda71 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h @@ -31,8 +31,10 @@ extern user_regset_active_fn fpregs_active, xfpregs_active; extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set; +extern struct _fpx_sw_bytes fx_sw_reserved; #ifdef CONFIG_IA32_EMULATION extern unsigned int sig_xstate_ia32_size; +extern struct _fpx_sw_bytes fx_sw_reserved_ia32; struct _fpstate_ia32; struct _xstate_ia32; extern int save_i387_xstate_ia32(void __user *buf); @@ -104,7 +106,7 @@ static inline void clear_fpu_state(struct task_struct *tsk) X86_FEATURE_FXSAVE_LEAK); } -static inline int save_i387_checking(struct i387_fxsave_struct __user *fx) +static inline int fxsave_user(struct i387_fxsave_struct __user *fx) { int err; diff --git a/include/asm-x86/ucontext.h b/include/asm-x86/ucontext.h index 9948dd3..89eaa54 100644 --- a/include/asm-x86/ucontext.h +++ b/include/asm-x86/ucontext.h @@ -1,6 +1,12 @@ #ifndef ASM_X86__UCONTEXT_H #define ASM_X86__UCONTEXT_H +#define UC_FP_XSTATE 0x1 /* indicates the presence of extended state + * information in the memory layout pointed + * by the fpstate pointer in the ucontext's + * sigcontext struct (uc_mcontext). + */ + struct ucontext { unsigned long uc_flags; struct ucontext *uc_link; diff --git a/include/asm-x86/xsave.h b/include/asm-x86/xsave.h index b716511..b7f64b9 100644 --- a/include/asm-x86/xsave.h +++ b/include/asm-x86/xsave.h @@ -29,6 +29,9 @@ extern struct xsave_struct *init_xstate_buf; extern void xsave_cntxt_init(void); extern void xsave_init(void); extern int init_fpu(struct task_struct *child); +extern int check_for_xstate(struct i387_fxsave_struct __user *buf, + void __user *fpstate, + struct _fpx_sw_bytes *sw); static inline int xrstor_checking(struct xsave_struct *fx) { @@ -48,7 +51,7 @@ static inline int xrstor_checking(struct xsave_struct *fx) return err; } -static inline int xsave_check(struct xsave_struct __user *buf) +static inline int xsave_user(struct xsave_struct __user *buf) { int err; __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" -- cgit v0.10.2 From 42deec6f2c3688fdaf986225ac901b817cd91568 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 29 Jul 2008 10:29:26 -0700 Subject: x86, xsave: update xsave header bits during ptrace fpregs set FP/SSE bits may be zero in the xsave header(representing the init state). Update these bits during the ptrace fpregs set operation, to indicate the non-init state. Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index cbb9dc4..e0ed59f 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -214,6 +214,13 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, */ target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; + /* + * update the header bits in the xsave header, indicating the + * presence of FP and SSE state. + */ + if (cpu_has_xsave) + target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; + return ret; } @@ -414,6 +421,12 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, if (!ret) convert_to_fxsr(target, &env); + /* + * update the header bit in the xsave header, indicating the + * presence of FP. + */ + if (cpu_has_xsave) + target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; return ret; } -- cgit v0.10.2 From b4a091a62c8e91d6077e575600363cff73fa02ef Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 29 Jul 2008 17:30:29 -0700 Subject: x86, xsave: add header file for XCR registers Add header file for the XCR registers and their access functions. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/xcr.h b/include/asm-x86/xcr.h new file mode 100644 index 0000000..f2cba4e7 --- /dev/null +++ b/include/asm-x86/xcr.h @@ -0,0 +1,49 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2008 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2 or (at your + * option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * asm-x86/xcr.h + * + * Definitions for the eXtended Control Register instructions + */ + +#ifndef _ASM_X86_XCR_H +#define _ASM_X86_XCR_H + +#define XCR_XFEATURE_ENABLED_MASK 0x00000000 + +#ifdef __KERNEL__ +# ifndef __ASSEMBLY__ + +#include + +static inline u64 xgetbv(u32 index) +{ + u32 eax, edx; + + asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */ + : "=a" (eax), "=d" (edx) + : "c" (index)); + return eax + ((u64)edx << 32); +} + +static inline void xsetbv(u32 index, u64 value) +{ + u32 eax = value; + u32 edx = value >> 32; + + asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */ + : : "a" (eax), "d" (edx), "c" (index)); +} + +# endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_XCR_H */ -- cgit v0.10.2 From 6152e4b1c99a3689fc318d092cd144597f7dbd14 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 29 Jul 2008 17:23:16 -0700 Subject: x86, xsave: keep the XSAVE feature mask as an u64 The XSAVE feature mask is a 64-bit number; keep it that way, in order to avoid the mistake done with rdmsr/wrmsr. Use the xsetbv() function provided in the previous patch. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index e0ed59f..45723f1 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -552,18 +552,17 @@ static int restore_i387_xsave(void __user *buf) (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; struct xsave_hdr_struct *xsave_hdr = ¤t->thread.xstate->xsave.xsave_hdr; - unsigned int lmask, hmask; + u64 mask; int err; if (check_for_xstate(fx, buf, &fx_sw_user)) goto fx_only; - lmask = fx_sw_user.xstate_bv; - hmask = fx_sw_user.xstate_bv >> 32; + mask = fx_sw_user.xstate_bv; err = restore_i387_fxsave(buf, fx_sw_user.xstate_size); - xsave_hdr->xstate_bv &= (pcntxt_lmask | (((u64) pcntxt_hmask) << 32)); + xsave_hdr->xstate_bv &= pcntxt_mask; /* * These bits must be zero. */ @@ -573,9 +572,8 @@ static int restore_i387_xsave(void __user *buf) * Init the state that is not present in the memory layout * and enabled by the OS. */ - lmask = ~(pcntxt_lmask & ~lmask); - hmask = ~(pcntxt_hmask & ~hmask); - xsave_hdr->xstate_bv &= (lmask | (((u64) hmask) << 32)); + mask = ~(pcntxt_mask & ~mask); + xsave_hdr->xstate_bv &= mask; return err; fx_only: diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index dd66d07..7415f3e 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -9,11 +9,12 @@ #ifdef CONFIG_IA32_EMULATION #include #endif +#include /* * Supported feature mask by the CPU and the kernel. */ -unsigned int pcntxt_hmask, pcntxt_lmask; +u64 pcntxt_mask; struct _fpx_sw_bytes fx_sw_reserved; #ifdef CONFIG_IA32_EMULATION @@ -127,30 +128,28 @@ int save_i387_xstate(void __user *buf) int restore_user_xstate(void __user *buf) { struct _fpx_sw_bytes fx_sw_user; - unsigned int lmask, hmask; + u64 mask; int err; if (((unsigned long)buf % 64) || check_for_xstate(buf, buf, &fx_sw_user)) goto fx_only; - lmask = fx_sw_user.xstate_bv; - hmask = fx_sw_user.xstate_bv >> 32; + mask = fx_sw_user.xstate_bv; /* * restore the state passed by the user. */ - err = xrestore_user(buf, lmask, hmask); + err = xrestore_user(buf, mask); if (err) return err; /* * init the state skipped by the user. */ - lmask = pcntxt_lmask & ~lmask; - hmask = pcntxt_hmask & ~hmask; + mask = pcntxt_mask & ~mask; - xrstor_state(init_xstate_buf, lmask, hmask); + xrstor_state(init_xstate_buf, mask); return 0; @@ -160,8 +159,7 @@ fx_only: * memory layout. Restore just the FP/SSE and init all * the other extended state. */ - xrstor_state(init_xstate_buf, pcntxt_lmask & ~XSTATE_FPSSE, - pcntxt_hmask); + xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE); return fxrstor_checking((__force struct i387_fxsave_struct *)buf); } @@ -231,8 +229,7 @@ void prepare_fx_sw_frame(void) fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; fx_sw_reserved.extended_size = sig_xstate_size; - fx_sw_reserved.xstate_bv = pcntxt_lmask | - (((u64) (pcntxt_hmask)) << 32); + fx_sw_reserved.xstate_bv = pcntxt_mask; fx_sw_reserved.xstate_size = xstate_size; #ifdef CONFIG_IA32_EMULATION memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved, @@ -263,11 +260,8 @@ void __cpuinit xsave_init(void) /* * Enable all the features that the HW is capable of * and the Linux kernel is aware of. - * - * xsetbv(); */ - asm volatile(".byte 0x0f,0x01,0xd1" : : "c" (0), - "a" (pcntxt_lmask), "d" (pcntxt_hmask)); + xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); } /* @@ -287,36 +281,31 @@ void __init xsave_cntxt_init(void) unsigned int eax, ebx, ecx, edx; cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + pcntxt_mask = eax + ((u64)edx << 32); - pcntxt_lmask = eax; - pcntxt_hmask = edx; - - if ((pcntxt_lmask & XSTATE_FPSSE) != XSTATE_FPSSE) { - printk(KERN_ERR "FP/SSE not shown under xsave features %x\n", - pcntxt_lmask); + if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { + printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n", + pcntxt_mask); BUG(); } /* * for now OS knows only about FP/SSE */ - pcntxt_lmask = pcntxt_lmask & XCNTXT_LMASK; - pcntxt_hmask = pcntxt_hmask & XCNTXT_HMASK; - + pcntxt_mask = pcntxt_mask & XCNTXT_MASK; xsave_init(); /* * Recompute the context size for enabled features */ cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); - xstate_size = ebx; prepare_fx_sw_frame(); setup_xstate_init(); - printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%Lx, " + printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, " "cntxt size 0x%x\n", - (pcntxt_lmask | ((u64) pcntxt_hmask << 32)), xstate_size); + pcntxt_mask, xstate_size); } diff --git a/include/asm-x86/xsave.h b/include/asm-x86/xsave.h index b7f64b9..08e9a1a 100644 --- a/include/asm-x86/xsave.h +++ b/include/asm-x86/xsave.h @@ -1,6 +1,7 @@ #ifndef __ASM_X86_XSAVE_H #define __ASM_X86_XSAVE_H +#include #include #include @@ -14,8 +15,7 @@ /* * These are the features that the OS can handle currently. */ -#define XCNTXT_LMASK (XSTATE_FP | XSTATE_SSE) -#define XCNTXT_HMASK 0x0 +#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE) #ifdef CONFIG_X86_64 #define REX_PREFIX "0x48, " @@ -23,7 +23,8 @@ #define REX_PREFIX #endif -extern unsigned int xstate_size, pcntxt_hmask, pcntxt_lmask; +extern unsigned int xstate_size; +extern u64 pcntxt_mask; extern struct xsave_struct *init_xstate_buf; extern void xsave_cntxt_init(void); @@ -73,12 +74,12 @@ static inline int xsave_user(struct xsave_struct __user *buf) return err; } -static inline int xrestore_user(struct xsave_struct __user *buf, - unsigned int lmask, - unsigned int hmask) +static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) { int err; struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); + u32 lmask = mask; + u32 hmask = mask >> 32; __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" "2:\n" @@ -96,8 +97,11 @@ static inline int xrestore_user(struct xsave_struct __user *buf, return err; } -static inline void xrstor_state(struct xsave_struct *fx, int lmask, int hmask) +static inline void xrstor_state(struct xsave_struct *fx, u64 mask) { + u32 lmask = mask; + u32 hmask = mask >> 32; + asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) : "memory"); -- cgit v0.10.2 From 0d1edf46ba229b46efacf75c0544b88c05a7b266 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 28 Jul 2008 11:53:57 -0700 Subject: xen: compile irq functions without -pg for ftrace For some reason I managed to miss a bunch of irq-related functions which also need to be compiled without -pg when using ftrace. This patch moves them into their own file, and starts a cleanup process I've been meaning to do anyway. Signed-off-by: Jeremy Fitzhardinge Cc: Sam Ravnborg Cc: "Alex Nixon (Intern)" Cc: Eduardo Habkost Signed-off-by: Ingo Molnar diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 5bfee24..9ee745f 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -2,9 +2,10 @@ ifdef CONFIG_FTRACE # Do not profile debug and lowlevel utilities CFLAGS_REMOVE_spinlock.o = -pg CFLAGS_REMOVE_time.o = -pg +CFLAGS_REMOVE_irq.o = -pg endif -obj-y := enlighten.o setup.o multicalls.o mmu.o \ +obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ time.o xen-asm_$(BITS).o grant-table.o suspend.o obj-$(CONFIG_SMP) += smp.o spinlock.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b795470..cf8b3a9 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -226,94 +225,6 @@ static unsigned long xen_get_debugreg(int reg) return HYPERVISOR_get_debugreg(reg); } -static unsigned long xen_save_fl(void) -{ - struct vcpu_info *vcpu; - unsigned long flags; - - vcpu = x86_read_percpu(xen_vcpu); - - /* flag has opposite sense of mask */ - flags = !vcpu->evtchn_upcall_mask; - - /* convert to IF type flag - -0 -> 0x00000000 - -1 -> 0xffffffff - */ - return (-flags) & X86_EFLAGS_IF; -} - -static void xen_restore_fl(unsigned long flags) -{ - struct vcpu_info *vcpu; - - /* convert from IF type flag */ - flags = !(flags & X86_EFLAGS_IF); - - /* There's a one instruction preempt window here. We need to - make sure we're don't switch CPUs between getting the vcpu - pointer and updating the mask. */ - preempt_disable(); - vcpu = x86_read_percpu(xen_vcpu); - vcpu->evtchn_upcall_mask = flags; - preempt_enable_no_resched(); - - /* Doesn't matter if we get preempted here, because any - pending event will get dealt with anyway. */ - - if (flags == 0) { - preempt_check_resched(); - barrier(); /* unmask then check (avoid races) */ - if (unlikely(vcpu->evtchn_upcall_pending)) - force_evtchn_callback(); - } -} - -static void xen_irq_disable(void) -{ - /* There's a one instruction preempt window here. We need to - make sure we're don't switch CPUs between getting the vcpu - pointer and updating the mask. */ - preempt_disable(); - x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; - preempt_enable_no_resched(); -} - -static void xen_irq_enable(void) -{ - struct vcpu_info *vcpu; - - /* We don't need to worry about being preempted here, since - either a) interrupts are disabled, so no preemption, or b) - the caller is confused and is trying to re-enable interrupts - on an indeterminate processor. */ - - vcpu = x86_read_percpu(xen_vcpu); - vcpu->evtchn_upcall_mask = 0; - - /* Doesn't matter if we get preempted here, because any - pending event will get dealt with anyway. */ - - barrier(); /* unmask then check (avoid races) */ - if (unlikely(vcpu->evtchn_upcall_pending)) - force_evtchn_callback(); -} - -static void xen_safe_halt(void) -{ - /* Blocking includes an implicit local_irq_enable(). */ - if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) - BUG(); -} - -static void xen_halt(void) -{ - if (irqs_disabled()) - HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); - else - xen_safe_halt(); -} - static void xen_leave_lazy(void) { paravirt_leave_lazy(paravirt_get_lazy_mode()); @@ -1308,36 +1219,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { }, }; -static void __init __xen_init_IRQ(void) -{ -#ifdef CONFIG_X86_64 - int i; - - /* Create identity vector->irq map */ - for(i = 0; i < NR_VECTORS; i++) { - int cpu; - - for_each_possible_cpu(cpu) - per_cpu(vector_irq, cpu)[i] = i; - } -#endif /* CONFIG_X86_64 */ - - xen_init_IRQ(); -} - -static const struct pv_irq_ops xen_irq_ops __initdata = { - .init_IRQ = __xen_init_IRQ, - .save_fl = xen_save_fl, - .restore_fl = xen_restore_fl, - .irq_disable = xen_irq_disable, - .irq_enable = xen_irq_enable, - .safe_halt = xen_safe_halt, - .halt = xen_halt, -#ifdef CONFIG_X86_64 - .adjust_exception_frame = xen_adjust_exception_frame, -#endif -}; - static const struct pv_apic_ops xen_apic_ops __initdata = { #ifdef CONFIG_X86_LOCAL_APIC .apic_write = xen_apic_write, @@ -1740,10 +1621,11 @@ asmlinkage void __init xen_start_kernel(void) pv_init_ops = xen_init_ops; pv_time_ops = xen_time_ops; pv_cpu_ops = xen_cpu_ops; - pv_irq_ops = xen_irq_ops; pv_apic_ops = xen_apic_ops; pv_mmu_ops = xen_mmu_ops; + xen_init_irq_ops(); + if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c new file mode 100644 index 0000000..28b85ab --- /dev/null +++ b/arch/x86/xen/irq.c @@ -0,0 +1,143 @@ +#include + +#include +#include +#include + +#include +#include + +#include "xen-ops.h" + +/* + * Force a proper event-channel callback from Xen after clearing the + * callback mask. We do this in a very simple manner, by making a call + * down into Xen. The pending flag will be checked by Xen on return. + */ +void xen_force_evtchn_callback(void) +{ + (void)HYPERVISOR_xen_version(0, NULL); +} + +static void __init __xen_init_IRQ(void) +{ +#ifdef CONFIG_X86_64 + int i; + + /* Create identity vector->irq map */ + for(i = 0; i < NR_VECTORS; i++) { + int cpu; + + for_each_possible_cpu(cpu) + per_cpu(vector_irq, cpu)[i] = i; + } +#endif /* CONFIG_X86_64 */ + + xen_init_IRQ(); +} + +static unsigned long xen_save_fl(void) +{ + struct vcpu_info *vcpu; + unsigned long flags; + + vcpu = x86_read_percpu(xen_vcpu); + + /* flag has opposite sense of mask */ + flags = !vcpu->evtchn_upcall_mask; + + /* convert to IF type flag + -0 -> 0x00000000 + -1 -> 0xffffffff + */ + return (-flags) & X86_EFLAGS_IF; +} + +static void xen_restore_fl(unsigned long flags) +{ + struct vcpu_info *vcpu; + + /* convert from IF type flag */ + flags = !(flags & X86_EFLAGS_IF); + + /* There's a one instruction preempt window here. We need to + make sure we're don't switch CPUs between getting the vcpu + pointer and updating the mask. */ + preempt_disable(); + vcpu = x86_read_percpu(xen_vcpu); + vcpu->evtchn_upcall_mask = flags; + preempt_enable_no_resched(); + + /* Doesn't matter if we get preempted here, because any + pending event will get dealt with anyway. */ + + if (flags == 0) { + preempt_check_resched(); + barrier(); /* unmask then check (avoid races) */ + if (unlikely(vcpu->evtchn_upcall_pending)) + xen_force_evtchn_callback(); + } +} + +static void xen_irq_disable(void) +{ + /* There's a one instruction preempt window here. We need to + make sure we're don't switch CPUs between getting the vcpu + pointer and updating the mask. */ + preempt_disable(); + x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; + preempt_enable_no_resched(); +} + +static void xen_irq_enable(void) +{ + struct vcpu_info *vcpu; + + /* We don't need to worry about being preempted here, since + either a) interrupts are disabled, so no preemption, or b) + the caller is confused and is trying to re-enable interrupts + on an indeterminate processor. */ + + vcpu = x86_read_percpu(xen_vcpu); + vcpu->evtchn_upcall_mask = 0; + + /* Doesn't matter if we get preempted here, because any + pending event will get dealt with anyway. */ + + barrier(); /* unmask then check (avoid races) */ + if (unlikely(vcpu->evtchn_upcall_pending)) + xen_force_evtchn_callback(); +} + +static void xen_safe_halt(void) +{ + /* Blocking includes an implicit local_irq_enable(). */ + if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) + BUG(); +} + +static void xen_halt(void) +{ + if (irqs_disabled()) + HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); + else + xen_safe_halt(); +} + +static const struct pv_irq_ops xen_irq_ops __initdata = { + .init_IRQ = __xen_init_IRQ, + .save_fl = xen_save_fl, + .restore_fl = xen_restore_fl, + .irq_disable = xen_irq_disable, + .irq_enable = xen_irq_enable, + .safe_halt = xen_safe_halt, + .halt = xen_halt, +#ifdef CONFIG_X86_64 + .adjust_exception_frame = xen_adjust_exception_frame, +#endif +}; + +void __init xen_init_irq_ops() +{ + pv_irq_ops = xen_irq_ops; +} diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 2497a30..42786f5 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -298,7 +298,7 @@ check_events: push %eax push %ecx push %edx - call force_evtchn_callback + call xen_force_evtchn_callback pop %edx pop %ecx pop %eax diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 7f58304..3b9bda4 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -122,7 +122,7 @@ check_events: push %r9 push %r10 push %r11 - call force_evtchn_callback + call xen_force_evtchn_callback pop %r11 pop %r10 pop %r9 diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 8847fb3..3c70ebc 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -31,6 +31,7 @@ void xen_vcpu_restore(void); void __init xen_build_dynamic_phys_to_machine(void); +void xen_init_irq_ops(void); void xen_setup_timer(int cpu); void xen_setup_cpu_clockevents(void); unsigned long xen_tsc_khz(void); diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 0e0c285..a083703 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -84,17 +84,6 @@ static int irq_bindcount[NR_IRQS]; /* Xen will never allocate port zero for any purpose. */ #define VALID_EVTCHN(chn) ((chn) != 0) -/* - * Force a proper event-channel callback from Xen after clearing the - * callback mask. We do this in a very simple manner, by making a call - * down into Xen. The pending flag will be checked by Xen on return. - */ -void force_evtchn_callback(void) -{ - (void)HYPERVISOR_xen_version(0, NULL); -} -EXPORT_SYMBOL_GPL(force_evtchn_callback); - static struct irq_chip xen_dynamic_chip; /* Constructor for packed IRQ information. */ -- cgit v0.10.2 From cef43bf6b3afd819f7cdcba356af0e8220fb3789 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 28 Jul 2008 13:33:44 -0700 Subject: xen: fix allocation and use of large ldts, cleanup Add a proper comment for set_aliased_prot() and fix an unsigned long/void * warning. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index cf8b3a9..04ec69e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -237,8 +237,10 @@ static unsigned long xen_store_tr(void) } /* - * If 'v' is a vmalloc mapping, then find the linear mapping of the - * page (if any) and also set its protections to match: + * Set the page permissions for a particular virtual address. If the + * address is a vmalloc mapping (or other non-linear mapping), then + * find the linear mapping of the page and also set its protections to + * match. */ static void set_aliased_prot(void *v, pgprot_t prot) { @@ -387,8 +389,7 @@ static void xen_load_gs_index(unsigned int idx) static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, const void *ptr) { - unsigned long lp = (unsigned long)&dt[entrynum]; - xmaddr_t mach_lp = arbitrary_virt_to_machine(lp); + xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]); u64 entry = *(u64 *)ptr; preempt_disable(); -- cgit v0.10.2 From 169ad16bb87c10a3f7c108bb7008ebc0270f617a Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 28 Jul 2008 18:32:09 -0300 Subject: xen_alloc_ptpage: cast PFN_PHYS() argument to unsigned long Currently paravirt_ops alloc_p*() uses u32 for the pfn args. We should change that later, but while the pfn parameter is still u32, we need to cast the PFN_PHYS() argument at xen_alloc_ptpage() to unsigned long, otherwise it will lose bits on the shift. I think PFN_PHYS() should behave better when fed with smaller integers, but a cast to unsigned long won't be enough for all cases on 32-bit PAE, and a cast to u64 would be overkill for most users of PFN_PHYS(). We could have two different flavors of PFN_PHYS: one for low pages only (unsigned long) and another that works for any page (u64)), but while we don't have it, we will need the cast to unsigned long on xen_alloc_ptpage(). Signed-off-by: Eduardo Habkost Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 04ec69e..53afa14 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -822,7 +822,7 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level) SetPagePinned(page); if (!PageHighMem(page)) { - make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); + make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); if (level == PT_PTE) pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); } else -- cgit v0.10.2 From 7de08b4e1ed8d80e6086f71b7e99fc4b397aae39 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 29 Jul 2008 02:48:51 -0300 Subject: x86: coding styles fixes to arch/x86/kernel/process_64.c Fix about 50 errors and many warnings without change process_64.o arch/x86/kernel/process_64.o: text data bss dec hex filename 5236 8 24 5268 1494 process_64.o.after 5236 8 24 5268 1494 process_64.o.before md5: 9c35e9debdea4e471288c6e8ca267a75 process_64.o.after 9c35e9debdea4e471288c6e8ca267a75 process_64.o.before Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3fb62a7..4da8514 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -37,11 +37,11 @@ #include #include #include +#include +#include -#include #include #include -#include #include #include #include @@ -88,7 +88,7 @@ void exit_idle(void) #ifdef CONFIG_HOTPLUG_CPU DECLARE_PER_CPU(int, cpu_state); -#include +#include /* We halt the CPU with physical CPU hotplug */ static inline void play_dead(void) { @@ -152,7 +152,7 @@ void cpu_idle(void) } /* Prints also some state that isn't saved in the pt_regs */ -void __show_regs(struct pt_regs * regs) +void __show_regs(struct pt_regs *regs) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; unsigned long d0, d1, d2, d3, d6, d7; @@ -177,28 +177,28 @@ void __show_regs(struct pt_regs * regs) printk("RBP: %016lx R08: %016lx R09: %016lx\n", regs->bp, regs->r8, regs->r9); printk("R10: %016lx R11: %016lx R12: %016lx\n", - regs->r10, regs->r11, regs->r12); + regs->r10, regs->r11, regs->r12); printk("R13: %016lx R14: %016lx R15: %016lx\n", - regs->r13, regs->r14, regs->r15); + regs->r13, regs->r14, regs->r15); - asm("movl %%ds,%0" : "=r" (ds)); - asm("movl %%cs,%0" : "=r" (cs)); - asm("movl %%es,%0" : "=r" (es)); + asm("movl %%ds,%0" : "=r" (ds)); + asm("movl %%cs,%0" : "=r" (cs)); + asm("movl %%es,%0" : "=r" (es)); asm("movl %%fs,%0" : "=r" (fsindex)); asm("movl %%gs,%0" : "=r" (gsindex)); rdmsrl(MSR_FS_BASE, fs); - rdmsrl(MSR_GS_BASE, gs); - rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); + rdmsrl(MSR_GS_BASE, gs); + rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); cr0 = read_cr0(); cr2 = read_cr2(); cr3 = read_cr3(); cr4 = read_cr4(); - printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", - fs,fsindex,gs,gsindex,shadowgs); - printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); + printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", + fs, fsindex, gs, gsindex, shadowgs); + printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); get_debugreg(d0, 0); @@ -314,10 +314,10 @@ void prepare_to_copy(struct task_struct *tsk) int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, unsigned long unused, - struct task_struct * p, struct pt_regs * regs) + struct task_struct *p, struct pt_regs *regs) { int err; - struct pt_regs * childregs; + struct pt_regs *childregs; struct task_struct *me = current; childregs = ((struct pt_regs *) @@ -362,10 +362,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, if (test_thread_flag(TIF_IA32)) err = do_set_thread_area(p, -1, (struct user_desc __user *)childregs->si, 0); - else -#endif - err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); - if (err) + else +#endif + err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); + if (err) goto out; } err = 0; @@ -544,7 +544,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) unsigned fsindex, gsindex; /* we're going to use this soon, after a few expensive things */ - if (next_p->fpu_counter>5) + if (next_p->fpu_counter > 5) prefetch(next->xstate); /* @@ -552,13 +552,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ load_sp0(tss, next); - /* + /* * Switch DS and ES. * This won't pick up thread selector changes, but I guess that is ok. */ savesegment(es, prev->es); if (unlikely(next->es | prev->es)) - loadsegment(es, next->es); + loadsegment(es, next->es); savesegment(ds, prev->ds); if (unlikely(next->ds | prev->ds)) @@ -584,7 +584,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ arch_leave_lazy_cpu_mode(); - /* + /* * Switch FS and GS. * * Segment register != 0 always requires a reload. Also @@ -593,13 +593,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ if (unlikely(fsindex | next->fsindex | prev->fs)) { loadsegment(fs, next->fsindex); - /* + /* * Check if the user used a selector != 0; if yes * clear 64bit base, since overloaded base is always * mapped to the Null selector */ if (fsindex) - prev->fs = 0; + prev->fs = 0; } /* when next process has a 64bit base use it */ if (next->fs) @@ -609,7 +609,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (unlikely(gsindex | next->gsindex | prev->gs)) { load_gs_index(next->gsindex); if (gsindex) - prev->gs = 0; + prev->gs = 0; } if (next->gs) wrmsrl(MSR_KERNEL_GS_BASE, next->gs); @@ -618,12 +618,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* Must be after DS reload */ unlazy_fpu(prev_p); - /* + /* * Switch the PDA and FPU contexts. */ prev->usersp = read_pda(oldrsp); write_pda(oldrsp, next->usersp); - write_pda(pcurrent, next_p); + write_pda(pcurrent, next_p); write_pda(kernelstack, (unsigned long)task_stack_page(next_p) + @@ -664,7 +664,7 @@ long sys_execve(char __user *name, char __user * __user *argv, char __user * __user *envp, struct pt_regs *regs) { long error; - char * filename; + char *filename; filename = getname(name); error = PTR_ERR(filename); @@ -722,55 +722,55 @@ asmlinkage long sys_vfork(struct pt_regs *regs) unsigned long get_wchan(struct task_struct *p) { unsigned long stack; - u64 fp,ip; + u64 fp, ip; int count = 0; - if (!p || p == current || p->state==TASK_RUNNING) - return 0; + if (!p || p == current || p->state == TASK_RUNNING) + return 0; stack = (unsigned long)task_stack_page(p); if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE) return 0; fp = *(u64 *)(p->thread.sp); - do { + do { if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE) - return 0; + return 0; ip = *(u64 *)(fp+8); if (!in_sched_functions(ip)) return ip; - fp = *(u64 *)fp; - } while (count++ < 16); + fp = *(u64 *)fp; + } while (count++ < 16); return 0; } long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) -{ - int ret = 0; +{ + int ret = 0; int doit = task == current; int cpu; - switch (code) { + switch (code) { case ARCH_SET_GS: if (addr >= TASK_SIZE_OF(task)) - return -EPERM; + return -EPERM; cpu = get_cpu(); - /* handle small bases via the GDT because that's faster to + /* handle small bases via the GDT because that's faster to switch. */ - if (addr <= 0xffffffff) { - set_32bit_tls(task, GS_TLS, addr); - if (doit) { + if (addr <= 0xffffffff) { + set_32bit_tls(task, GS_TLS, addr); + if (doit) { load_TLS(&task->thread, cpu); - load_gs_index(GS_TLS_SEL); + load_gs_index(GS_TLS_SEL); } - task->thread.gsindex = GS_TLS_SEL; + task->thread.gsindex = GS_TLS_SEL; task->thread.gs = 0; - } else { + } else { task->thread.gsindex = 0; task->thread.gs = addr; if (doit) { load_gs_index(0); ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); - } + } } put_cpu(); break; @@ -824,8 +824,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) rdmsrl(MSR_KERNEL_GS_BASE, base); else base = task->thread.gs; - } - else + } else base = task->thread.gs; ret = put_user(base, (unsigned long __user *)addr); break; -- cgit v0.10.2 From 8092c654de9a964c14d89da56834f73a80548a58 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 29 Jul 2008 02:48:52 -0300 Subject: x86: add KERN_INFO to printks on process_64.c Fix many coding style warnings. Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4da8514..3560d7f 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -161,24 +161,24 @@ void __show_regs(struct pt_regs *regs) printk("\n"); print_modules(); - printk("Pid: %d, comm: %.20s %s %s %.*s\n", + printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n", current->pid, current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); - printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); + printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); printk_address(regs->ip, 1); - printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp, - regs->flags); - printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", + printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, + regs->sp, regs->flags); + printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", regs->ax, regs->bx, regs->cx); - printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", + printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", regs->dx, regs->si, regs->di); - printk("RBP: %016lx R08: %016lx R09: %016lx\n", + printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", regs->bp, regs->r8, regs->r9); - printk("R10: %016lx R11: %016lx R12: %016lx\n", + printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", regs->r10, regs->r11, regs->r12); - printk("R13: %016lx R14: %016lx R15: %016lx\n", + printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", regs->r13, regs->r14, regs->r15); asm("movl %%ds,%0" : "=r" (ds)); @@ -196,24 +196,26 @@ void __show_regs(struct pt_regs *regs) cr3 = read_cr3(); cr4 = read_cr4(); - printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", + printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", fs, fsindex, gs, gsindex, shadowgs); - printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); - printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); + printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, + es, cr0); + printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, + cr4); get_debugreg(d0, 0); get_debugreg(d1, 1); get_debugreg(d2, 2); - printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); + printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); get_debugreg(d3, 3); get_debugreg(d6, 6); get_debugreg(d7, 7); - printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); + printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); } void show_regs(struct pt_regs *regs) { - printk("CPU %d:", smp_processor_id()); + printk(KERN_INFO "CPU %d:", smp_processor_id()); __show_regs(regs); show_trace(NULL, regs, (void *)(regs + 1), regs->bp); } -- cgit v0.10.2 From 08aadf069d0482ade033badefa8f03eb2fcddd9c Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 29 Jul 2008 02:48:53 -0300 Subject: x86: coding style fixes to arch/x86/kernel/crash_dump_64.c Fix conding style without change crash_dump_64.o arch/x86/kernel/crash_dump_64.o text data bss dec hex filename 129 0 0 129 81 crash_dump_64.o.after 129 0 0 129 81 crash_dump_64.o.before md5: 885b52c1b92737e6b12e5107e90fc1f1 crash_dump_64.o.after 885b52c1b92737e6b12e5107e90fc1f1 crash_dump_64.o.before Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index 15e6c6b..d3e524c 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c @@ -7,9 +7,8 @@ #include #include - -#include -#include +#include +#include /** * copy_oldmem_page - copy one page from "oldmem" @@ -25,7 +24,7 @@ * in the current kernel. We stitch up a pte, similar to kmap_atomic. */ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, int userbuf) + size_t csize, unsigned long offset, int userbuf) { void *vaddr; -- cgit v0.10.2 From caa007dd3687d38a0252484d9d0a8f9d929ba932 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 29 Jul 2008 02:48:54 -0300 Subject: x86: coding style fixes to arch/x86/kernel/signal_64.c Fix all errors and many warnings reported by checkpatch.pl without change signal_64.o arch/x86/kernel/signal_64.o text data bss dec hex filename 5143 0 8 5151 141f signal_64.o.after 5143 0 8 5151 141f signal_64.o.before md5: e68718092b3641cb27e79e55ce57e3ad signal_64.o.after e68718092b3641cb27e79e55ce57e3ad signal_64.o.before Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h index 72bbb51..8b4956e 100644 --- a/arch/x86/kernel/sigframe.h +++ b/arch/x86/kernel/sigframe.h @@ -24,4 +24,9 @@ struct rt_sigframe { struct ucontext uc; struct siginfo info; }; + +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs); +int ia32_setup_frame(int sig, struct k_sigaction *ka, + sigset_t *set, struct pt_regs *regs); #endif diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index b45ef8d..87a9c2f 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -19,9 +19,10 @@ #include #include #include +#include + #include #include -#include #include #include #include @@ -41,11 +42,6 @@ # define FIX_EFLAGS __FIX_EFLAGS #endif -int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs * regs); -int ia32_setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs * regs); - asmlinkage long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, struct pt_regs *regs) @@ -119,7 +115,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; -#define COPY(x) err |= __get_user(regs->x, &sc->x) +#define COPY(x) (err |= __get_user(regs->x, &sc->x)) COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); COPY(dx); COPY(cx); COPY(ip); @@ -149,7 +145,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, } { - struct _fpstate __user * buf; + struct _fpstate __user *buf; err |= __get_user(buf, &sc->fpstate); if (buf) { @@ -189,7 +185,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) current->blocked = set; recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) goto badframe; @@ -199,16 +195,17 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) return ax; badframe: - signal_fault(regs,frame,"sigreturn"); + signal_fault(regs, frame, "sigreturn"); return 0; -} +} /* * Set up a signal frame. */ static inline int -setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, struct task_struct *me) +setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, + unsigned long mask, struct task_struct *me) { int err = 0; @@ -264,35 +261,35 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) } static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs * regs) + sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; - struct _fpstate __user *fp = NULL; + struct _fpstate __user *fp = NULL; int err = 0; struct task_struct *me = current; if (used_math()) { - fp = get_stack(ka, regs, sizeof(struct _fpstate)); + fp = get_stack(ka, regs, sizeof(struct _fpstate)); frame = (void __user *)round_down( (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) goto give_sigsegv; - if (save_i387(fp) < 0) - err |= -1; + if (save_i387(fp) < 0) + err |= -1; } else frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; - if (ka->sa.sa_flags & SA_SIGINFO) { + if (ka->sa.sa_flags & SA_SIGINFO) { err |= copy_siginfo_to_user(&frame->info, info); if (err) goto give_sigsegv; } - + /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); @@ -302,9 +299,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me); err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate); - if (sizeof(*set) == 16) { + if (sizeof(*set) == 16) { __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); - __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); + __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); } else err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); @@ -315,7 +312,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); } else { /* could use a vstub here */ - goto give_sigsegv; + goto give_sigsegv; } if (err) @@ -323,7 +320,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->di = sig; - /* In case the signal handler was declared without prototypes */ + /* In case the signal handler was declared without prototypes */ regs->ax = 0; /* This also works for non SA_SIGINFO handlers because they expect the @@ -376,7 +373,7 @@ static long current_syscall_ret(struct pt_regs *regs) /* * OK, we're invoking a handler - */ + */ static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, @@ -420,7 +417,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs); else ret = ia32_setup_frame(sig, ka, oldset, regs); - } else + } else #endif ret = setup_rt_frame(sig, ka, info, oldset, regs); @@ -448,9 +445,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, ptrace_notify(SIGTRAP); spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked,sig); + sigaddset(¤t->blocked, sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } @@ -552,14 +549,15 @@ void do_notify_resume(struct pt_regs *regs, void *unused, } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) -{ - struct task_struct *me = current; +{ + struct task_struct *me = current; if (show_unhandled_signals && printk_ratelimit()) { printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", - me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax); + me->comm, me->pid, where, frame, regs->ip, + regs->sp, regs->orig_ax); print_vma_addr(" in ", regs->ip); printk("\n"); } - force_sig(SIGSEGV, me); -} + force_sig(SIGSEGV, me); +} -- cgit v0.10.2 From 4df9e510a9fda29aca71d8acac853b98aa6884d1 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 29 Jul 2008 02:48:55 -0300 Subject: x86: coding style fixes to arch/x86/kernel/traps_64.c Fix all errors and many warnings reported by checkpath.pl. Except the change of include to the traps.o before and after changes are the same. Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 3f18d73..fe36d96 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -32,6 +32,8 @@ #include #include #include +#include +#include #if defined(CONFIG_EDAC) #include @@ -45,9 +47,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -85,7 +84,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) void printk_address(unsigned long address, int reliable) { - printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address); + printk(" [<%016lx>] %s%pS\n", address, reliable ? + "" : "? ", (void *) address); } static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, @@ -98,7 +98,8 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, [STACKFAULT_STACK - 1] = "#SS", [MCE_STACK - 1] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ - [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" + [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / + EXCEPTION_STKSZ - 2] = "#DB[?]" #endif }; unsigned k; @@ -163,7 +164,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, } /* - * x86-64 can have up to three kernel stacks: + * x86-64 can have up to three kernel stacks: * process stack * interrupt stack * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack @@ -219,7 +220,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); - unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; + unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; unsigned used = 0; struct thread_info *tinfo; @@ -237,7 +238,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (!bp) { if (task == current) { /* Grab bp right from our regs */ - asm("movq %%rbp, %0" : "=r" (bp) :); + asm("movq %%rbp, %0" : "=r" (bp) : ); } else { /* bp is the last reg pushed by switch_to */ bp = *(unsigned long *) task->thread.sp; @@ -357,11 +358,13 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack; int i; const int cpu = smp_processor_id(); - unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); - unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); + unsigned long *irqstack_end = + (unsigned long *) (cpu_pda(cpu)->irqstackptr); + unsigned long *irqstack = + (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); - // debugging aid: "show_stack(NULL, NULL);" prints the - // back trace for this cpu. + /* debugging aid: "show_stack(NULL, NULL);" prints the + back trace for this cpu. */ if (sp == NULL) { if (task) @@ -404,7 +407,7 @@ void dump_stack(void) #ifdef CONFIG_FRAME_POINTER if (!bp) - asm("movq %%rbp, %0" : "=r" (bp):); + asm("movq %%rbp, %0" : "=r" (bp) : ); #endif printk("Pid: %d, comm: %.20s %s %s %.*s\n", @@ -414,7 +417,6 @@ void dump_stack(void) init_utsname()->version); show_trace(NULL, NULL, &stack, bp); } - EXPORT_SYMBOL(dump_stack); void show_registers(struct pt_regs *regs) @@ -493,7 +495,7 @@ unsigned __kprobes long oops_begin(void) raw_local_irq_save(flags); cpu = smp_processor_id(); if (!__raw_spin_trylock(&die_lock)) { - if (cpu == die_owner) + if (cpu == die_owner) /* nested oops. should stop eventually */; else __raw_spin_lock(&die_lock); @@ -638,7 +640,7 @@ kernel_trap: } #define DO_ERROR(trapnr, signr, str, name) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +asmlinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ @@ -648,7 +650,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ } #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +asmlinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ siginfo_t info; \ info.si_signo = signr; \ @@ -683,7 +685,7 @@ asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) preempt_conditional_cli(regs); } -asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) +asmlinkage void do_double_fault(struct pt_regs *regs, long error_code) { static const char str[] = "double fault"; struct task_struct *tsk = current; @@ -778,9 +780,10 @@ io_check_error(unsigned char reason, struct pt_regs *regs) } static notrace __kprobes void -unknown_nmi_error(unsigned char reason, struct pt_regs * regs) +unknown_nmi_error(unsigned char reason, struct pt_regs *regs) { - if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) + if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == + NOTIFY_STOP) return; printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", reason); @@ -882,7 +885,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) else if (user_mode(eregs)) regs = task_pt_regs(current); /* Exception from kernel and interrupts are enabled. Move to - kernel process stack. */ + kernel process stack. */ else if (eregs->flags & X86_EFLAGS_IF) regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); if (eregs != regs) @@ -891,7 +894,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) } /* runs on IST stack. */ -asmlinkage void __kprobes do_debug(struct pt_regs * regs, +asmlinkage void __kprobes do_debug(struct pt_regs *regs, unsigned long error_code) { struct task_struct *tsk = current; @@ -1035,7 +1038,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs) asmlinkage void bad_intr(void) { - printk("bad interrupt"); + printk("bad interrupt"); } asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) @@ -1047,7 +1050,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) conditional_sti(regs); if (!user_mode(regs) && - kernel_math_error(regs, "kernel simd math error", 19)) + kernel_math_error(regs, "kernel simd math error", 19)) return; /* @@ -1092,7 +1095,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) force_sig_info(SIGFPE, &info, task); } -asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs) +asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs) { } @@ -1142,8 +1145,10 @@ void __init trap_init(void) set_intr_gate(0, ÷_error); set_intr_gate_ist(1, &debug, DEBUG_STACK); set_intr_gate_ist(2, &nmi, NMI_STACK); - set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */ - set_system_gate(4, &overflow); /* int4 can be called from all */ + /* int3 can be called from all */ + set_system_gate_ist(3, &int3, DEBUG_STACK); + /* int4 can be called from all */ + set_system_gate(4, &overflow); set_intr_gate(5, &bounds); set_intr_gate(6, &invalid_op); set_intr_gate(7, &device_not_available); -- cgit v0.10.2 From e9c8abb66cc37801bdb5d4360bb78d180c3bbb73 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 29 Jul 2008 02:48:56 -0300 Subject: x86: coding style fixes to arch/x86/kernel/sys_x86_64.c Fix all errors and many warnings reported by checkpatch.pl without change sys_x86_64.o arch/x86/kernel/sys_x86_64.o: text data bss dec hex filename 1567 0 0 1567 61f sys_x86_64.o.after 1567 0 0 1567 61f sys_x86_64.o.before md5: de28ffedcb5851dfd7ec87a03afec1fd sys_x86_64.o.after de28ffedcb5851dfd7ec87a03afec1fd sys_x86_64.o.before Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 3b360ef..56eb8f9 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -13,15 +13,16 @@ #include #include #include +#include -#include #include -asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long off) +asmlinkage long sys_mmap(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long off) { long error; - struct file * file; + struct file *file; error = -EINVAL; if (off & ~PAGE_MASK) @@ -56,9 +57,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin, unmapped base down for this case. This can give conflicts with the heap, but we assume that glibc malloc knows how to fall back to mmap. Give it 1GB - of playground for now. -AK */ - *begin = 0x40000000; - *end = 0x80000000; + of playground for now. -AK */ + *begin = 0x40000000; + *end = 0x80000000; if (current->flags & PF_RANDOMIZE) { new_begin = randomize_range(*begin, *begin + 0x02000000, 0); if (new_begin) @@ -66,9 +67,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin, } } else { *begin = TASK_UNMAPPED_BASE; - *end = TASK_SIZE; + *end = TASK_SIZE; } -} +} unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, @@ -78,11 +79,11 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, struct vm_area_struct *vma; unsigned long start_addr; unsigned long begin, end; - + if (flags & MAP_FIXED) return addr; - find_start_end(flags, &begin, &end); + find_start_end(flags, &begin, &end); if (len > end) return -ENOMEM; @@ -96,12 +97,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, } if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32)) && len <= mm->cached_hole_size) { - mm->cached_hole_size = 0; + mm->cached_hole_size = 0; mm->free_area_cache = begin; } addr = mm->free_area_cache; - if (addr < begin) - addr = begin; + if (addr < begin) + addr = begin; start_addr = addr; full_search: @@ -127,7 +128,7 @@ full_search: return addr; } if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; + mm->cached_hole_size = vma->vm_start - addr; addr = vma->vm_end; } @@ -177,7 +178,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, vma = find_vma(mm, addr-len); if (!vma || addr <= vma->vm_start) /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr-len); + return mm->free_area_cache = addr-len; } if (mm->mmap_base < len) @@ -194,7 +195,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, vma = find_vma(mm, addr); if (!vma || addr+len <= vma->vm_start) /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr); + return mm->free_area_cache = addr; /* remember the largest hole we saw so far */ if (addr + mm->cached_hole_size < vma->vm_start) @@ -224,13 +225,13 @@ bottomup: } -asmlinkage long sys_uname(struct new_utsname __user * name) +asmlinkage long sys_uname(struct new_utsname __user *name) { int err; down_read(&uts_sem); - err = copy_to_user(name, utsname(), sizeof (*name)); + err = copy_to_user(name, utsname(), sizeof(*name)); up_read(&uts_sem); - if (personality(current->personality) == PER_LINUX32) - err |= copy_to_user(&name->machine, "i686", 5); + if (personality(current->personality) == PER_LINUX32) + err |= copy_to_user(&name->machine, "i686", 5); return err ? -EFAULT : 0; } -- cgit v0.10.2 From 64a76f667d987a559ad0726b4692c987800b22bc Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 29 Jul 2008 12:47:38 -0700 Subject: hpet: /dev/hpet - fixes and cleanup Minor /dev/hpet updates and bugfixes: * Remove dead code, mostly remnants of an incomplete/unusable kernel interface ... noted when addressing "sparse" warnings: + hpet_unregister() and a routine it calls + hpet_task and all references, including hpet_task_lock + hpet_data.hd_flags (and HPET_DATA_PLATFORM) * Correct and improve boot message: + displays *counter* (shared between comparators) bit width, not *timer* bit widths (which are often mixed) + relabel "timers" as "comparators"; this is less confusing, they are not independent like normal timers are (sigh) + display MHz not Hz; it's never less than 10 MHz. * Tighten and correct the userspace interface code + don't accidentally program comparators in 64-bit mode using 32-bit values ... always force comparators into 32-bit mode + provide the correct bit definition flagging comparators with periodic capability ... the ABI is unchanged * Update Documentation/hpet.txt + be more correct and current + expand description a bit + don't mention that now-gone kernel interface Plus, add a FIXME comment for something that could cause big trouble on systems with more capable HPETs than at least Intel seems to ship. It seems that few folk use this userspace interface; it's not very usable given the general lack of HPET IRQ routing. I'm told that the only real point of it any more is to mmap for fast timestamps; IMO that's handled better through the gettimeofday() vsyscall. Signed-off-by: David Brownell Acked-by: Clemens Ladisch Signed-off-by: Ingo Molnar diff --git a/Documentation/timers/hpet.txt b/Documentation/timers/hpet.txt index 6ad52d9..e7c09ab 100644 --- a/Documentation/timers/hpet.txt +++ b/Documentation/timers/hpet.txt @@ -1,21 +1,32 @@ High Precision Event Timer Driver for Linux -The High Precision Event Timer (HPET) hardware is the future replacement -for the 8254 and Real Time Clock (RTC) periodic timer functionality. -Each HPET can have up to 32 timers. It is possible to configure the -first two timers as legacy replacements for 8254 and RTC periodic timers. -A specification done by Intel and Microsoft can be found at -. +The High Precision Event Timer (HPET) hardware follows a specification +by Intel and Microsoft which can be found at + + http://www.intel.com/technology/architecture/hpetspec.htm + +Each HPET has one fixed-rate counter (at 10+ MHz, hence "High Precision") +and up to 32 comparators. Normally three or more comparators are provided, +each of which can generate oneshot interupts and at least one of which has +additional hardware to support periodic interrupts. The comparators are +also called "timers", which can be misleading since usually timers are +independent of each other ... these share a counter, complicating resets. + +HPET devices can support two interrupt routing modes. In one mode, the +comparators are additional interrupt sources with no particular system +role. Many x86 BIOS writers don't route HPET interrupts at all, which +prevents use of that mode. They support the other "legacy replacement" +mode where the first two comparators block interrupts from 8254 timers +and from the RTC. The driver supports detection of HPET driver allocation and initialization of the HPET before the driver module_init routine is called. This enables platform code which uses timer 0 or 1 as the main timer to intercept HPET initialization. An example of this initialization can be found in -arch/i386/kernel/time_hpet.c. +arch/x86/kernel/hpet.c. -The driver provides two APIs which are very similar to the API found in -the rtc.c driver. There is a user space API and a kernel space API. -An example user space program is provided below. +The driver provides a userspace API which resembles the API found in the +RTC driver framework. An example user space program is provided below. #include #include @@ -286,15 +297,3 @@ out: return; } - -The kernel API has three interfaces exported from the driver: - - hpet_register(struct hpet_task *tp, int periodic) - hpet_unregister(struct hpet_task *tp) - hpet_control(struct hpet_task *tp, unsigned int cmd, unsigned long arg) - -The kernel module using this interface fills in the ht_func and ht_data -members of the hpet_task structure before calling hpet_register. -hpet_control simply vectors to the hpet_ioctl routine and has the same -commands and respective arguments as the user API. hpet_unregister -is used to terminate usage of the HPET timer reserved by hpet_register. diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ad2b15a..82d4591 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -115,13 +115,17 @@ static void hpet_reserve_platform_timers(unsigned long id) hd.hd_phys_address = hpet_address; hd.hd_address = hpet; hd.hd_nirqs = nrtimers; - hd.hd_flags = HPET_DATA_PLATFORM; hpet_reserve_timer(&hd, 0); #ifdef CONFIG_HPET_EMULATE_RTC hpet_reserve_timer(&hd, 1); #endif + /* + * NOTE that hd_irq[] reflects IOAPIC input pins (LEGACY_8254 + * is wrong for i8259!) not the output IRQ. Many BIOS writers + * don't bother configuring *any* comparator interrupts. + */ hd.hd_irq[0] = HPET_LEGACY_8254; hd.hd_irq[1] = HPET_LEGACY_RTC; diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index f3981ff..4bc1da4 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -53,6 +53,11 @@ #define HPET_RANGE_SIZE 1024 /* from HPET spec */ + +/* WARNING -- don't get confused. These macros are never used + * to write the (single) counter, and rarely to read it. + * They're badly named; to fix, someday. + */ #if BITS_PER_LONG == 64 #define write_counter(V, MC) writeq(V, MC) #define read_counter(MC) readq(MC) @@ -77,7 +82,7 @@ static struct clocksource clocksource_hpet = { .rating = 250, .read = read_hpet, .mask = CLOCKSOURCE_MASK(64), - .mult = 0, /*to be caluclated*/ + .mult = 0, /* to be calculated */ .shift = 10, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -86,8 +91,6 @@ static struct clocksource *hpet_clocksource; /* A lock for concurrent access by app and isr hpet activity. */ static DEFINE_SPINLOCK(hpet_lock); -/* A lock for concurrent intermodule access to hpet and isr hpet activity. */ -static DEFINE_SPINLOCK(hpet_task_lock); #define HPET_DEV_NAME (7) @@ -99,7 +102,6 @@ struct hpet_dev { unsigned long hd_irqdata; wait_queue_head_t hd_waitqueue; struct fasync_struct *hd_async_queue; - struct hpet_task *hd_task; unsigned int hd_flags; unsigned int hd_irq; unsigned int hd_hdwirq; @@ -173,11 +175,6 @@ static irqreturn_t hpet_interrupt(int irq, void *data) writel(isr, &devp->hd_hpet->hpet_isr); spin_unlock(&hpet_lock); - spin_lock(&hpet_task_lock); - if (devp->hd_task) - devp->hd_task->ht_func(devp->hd_task->ht_data); - spin_unlock(&hpet_task_lock); - wake_up_interruptible(&devp->hd_waitqueue); kill_fasync(&devp->hd_async_queue, SIGIO, POLL_IN); @@ -260,8 +257,7 @@ static int hpet_open(struct inode *inode, struct file *file) for (devp = NULL, hpetp = hpets; hpetp && !devp; hpetp = hpetp->hp_next) for (i = 0; i < hpetp->hp_ntimer; i++) - if (hpetp->hp_dev[i].hd_flags & HPET_OPEN - || hpetp->hp_dev[i].hd_task) + if (hpetp->hp_dev[i].hd_flags & HPET_OPEN) continue; else { devp = &hpetp->hp_dev[i]; @@ -504,7 +500,11 @@ static int hpet_ioctl_ieon(struct hpet_dev *devp) devp->hd_irq = irq; t = devp->hd_ireqfreq; v = readq(&timer->hpet_config); - g = v | Tn_INT_ENB_CNF_MASK; + + /* 64-bit comparators are not yet supported through the ioctls, + * so force this into 32-bit mode if it supports both modes + */ + g = v | Tn_32MODE_CNF_MASK | Tn_INT_ENB_CNF_MASK; if (devp->hd_flags & HPET_PERIODIC) { write_counter(t, &timer->hpet_compare); @@ -514,6 +514,12 @@ static int hpet_ioctl_ieon(struct hpet_dev *devp) v |= Tn_VAL_SET_CNF_MASK; writeq(v, &timer->hpet_config); local_irq_save(flags); + + /* NOTE: what we modify here is a hidden accumulator + * register supported by periodic-capable comparators. + * We never want to modify the (single) counter; that + * would affect all the comparators. + */ m = read_counter(&hpet->hpet_mc); write_counter(t + m + hpetp->hp_delta, &timer->hpet_compare); } else { @@ -667,57 +673,6 @@ static int hpet_is_known(struct hpet_data *hdp) return 0; } -static inline int hpet_tpcheck(struct hpet_task *tp) -{ - struct hpet_dev *devp; - struct hpets *hpetp; - - devp = tp->ht_opaque; - - if (!devp) - return -ENXIO; - - for (hpetp = hpets; hpetp; hpetp = hpetp->hp_next) - if (devp >= hpetp->hp_dev - && devp < (hpetp->hp_dev + hpetp->hp_ntimer) - && devp->hd_hpet == hpetp->hp_hpet) - return 0; - - return -ENXIO; -} - -#if 0 -int hpet_unregister(struct hpet_task *tp) -{ - struct hpet_dev *devp; - struct hpet_timer __iomem *timer; - int err; - - if ((err = hpet_tpcheck(tp))) - return err; - - spin_lock_irq(&hpet_task_lock); - spin_lock(&hpet_lock); - - devp = tp->ht_opaque; - if (devp->hd_task != tp) { - spin_unlock(&hpet_lock); - spin_unlock_irq(&hpet_task_lock); - return -ENXIO; - } - - timer = devp->hd_timer; - writeq((readq(&timer->hpet_config) & ~Tn_INT_ENB_CNF_MASK), - &timer->hpet_config); - devp->hd_flags &= ~(HPET_IE | HPET_PERIODIC); - devp->hd_task = NULL; - spin_unlock(&hpet_lock); - spin_unlock_irq(&hpet_task_lock); - - return 0; -} -#endif /* 0 */ - static ctl_table hpet_table[] = { { .ctl_name = CTL_UNNUMBERED, @@ -872,9 +827,12 @@ int hpet_alloc(struct hpet_data *hdp) printk("%s %d", i > 0 ? "," : "", hdp->hd_irq[i]); printk("\n"); - printk(KERN_INFO "hpet%u: %u %d-bit timers, %Lu Hz\n", - hpetp->hp_which, hpetp->hp_ntimer, - cap & HPET_COUNTER_SIZE_MASK ? 64 : 32, hpetp->hp_tick_freq); + printk(KERN_INFO + "hpet%u: %u comparators, %d-bit %u.%06u MHz counter\n", + hpetp->hp_which, hpetp->hp_ntimer, + cap & HPET_COUNTER_SIZE_MASK ? 64 : 32, + (unsigned) (hpetp->hp_tick_freq / 1000000), + (unsigned) (hpetp->hp_tick_freq % 1000000)); mcfg = readq(&hpet->hpet_config); if ((mcfg & HPET_ENABLE_CNF_MASK) == 0) { diff --git a/include/linux/hpet.h b/include/linux/hpet.h index 6d2626b..79f63a2 100644 --- a/include/linux/hpet.h +++ b/include/linux/hpet.h @@ -92,23 +92,14 @@ struct hpet { * exported interfaces */ -struct hpet_task { - void (*ht_func) (void *); - void *ht_data; - void *ht_opaque; -}; - struct hpet_data { unsigned long hd_phys_address; void __iomem *hd_address; unsigned short hd_nirqs; - unsigned short hd_flags; unsigned int hd_state; /* timer allocated */ unsigned int hd_irq[HPET_MAX_TIMERS]; }; -#define HPET_DATA_PLATFORM 0x0001 /* platform call to hpet_alloc */ - static inline void hpet_reserve_timer(struct hpet_data *hd, int timer) { hd->hd_state |= (1 << timer); @@ -126,7 +117,7 @@ struct hpet_info { unsigned short hi_timer; }; -#define HPET_INFO_PERIODIC 0x0001 /* timer is periodic */ +#define HPET_INFO_PERIODIC 0x0010 /* periodic-capable comparator */ #define HPET_IE_ON _IO('h', 0x01) /* interrupt on */ #define HPET_IE_OFF _IO('h', 0x02) /* interrupt off */ -- cgit v0.10.2 From f92a789d259eb95afe7498ff5938fe2a93d39c82 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Thu, 31 Jul 2008 12:59:56 -0700 Subject: hpet: /dev/hpet - fixes and cleanup, fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: On Thursday 31 July 2008, Ingo Molnar wrote: >   drivers/built-in.o: In function `hpet_alloc': >   : undefined reference to `__udivdi3' >   drivers/built-in.o: In function `hpet_alloc': >   : undefined reference to `__umoddi3' Signed-off-by: David Brownell Signed-off-by: Ingo Molnar diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 4bc1da4..2908a0e 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -764,6 +764,7 @@ int hpet_alloc(struct hpet_data *hdp) static struct hpets *last = NULL; unsigned long period; unsigned long long temp; + u32 remainder; /* * hpet_alloc can be called by platform dependent code. @@ -827,12 +828,13 @@ int hpet_alloc(struct hpet_data *hdp) printk("%s %d", i > 0 ? "," : "", hdp->hd_irq[i]); printk("\n"); + temp = hpetp->hp_tick_freq; + remainder = do_div(temp, 1000000); printk(KERN_INFO "hpet%u: %u comparators, %d-bit %u.%06u MHz counter\n", hpetp->hp_which, hpetp->hp_ntimer, cap & HPET_COUNTER_SIZE_MASK ? 64 : 32, - (unsigned) (hpetp->hp_tick_freq / 1000000), - (unsigned) (hpetp->hp_tick_freq % 1000000)); + (unsigned) temp, remainder); mcfg = readq(&hpet->hpet_config); if ((mcfg & HPET_ENABLE_CNF_MASK) == 0) { -- cgit v0.10.2 From a677f58a8c8c541bf7d02c658545084040f3708d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 29 Jul 2008 00:37:10 -0700 Subject: x86: print per_cpu data address to make sure per_cpu data on correct node. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index f7745f9..61f3966 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -180,9 +180,16 @@ void __init setup_per_cpu_areas(void) printk(KERN_INFO "cpu %d has no node %d or node-local memory\n", cpu, node); + if (ptr) + printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", + cpu, __pa(ptr)); } - else + else { ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); + if (ptr) + printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", + cpu, node, __pa(ptr)); + } #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); -- cgit v0.10.2 From a0ac87d61ba20932e54f08464d3f3439d78fa878 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Tue, 29 Jul 2008 17:41:04 +0200 Subject: x86: AMD microcode patch loader style corrections Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 07e52be..6789530 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -47,9 +47,9 @@ MODULE_LICENSE("GPL v2"); #define UCODE_UCODE_TYPE 0x00000001 #define UCODE_MAX_SIZE (2048) -#define DEFAULT_UCODE_DATASIZE (896) /* 896 bytes */ -#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) /* 64 bytes */ -#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 960 bytes */ +#define DEFAULT_UCODE_DATASIZE (896) +#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) +#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) #define DWSIZE (sizeof(u32)) /* For now we support a fixed ucode total size only */ #define get_totalsize(mc) \ -- cgit v0.10.2 From f516526febb75500f280def2108688d388df4e1e Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Tue, 29 Jul 2008 17:41:05 +0200 Subject: x86: Intel microcode patch loader style corrections Signed-off-by: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 6da4a85..a61986e 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -55,8 +55,8 @@ * in a single CPU package. * 1.10 28 Feb 2002 Asit K Mallick and * Tigran Aivazian , - * Serialize updates as required on HT processors due to speculative - * nature of implementation. + * Serialize updates as required on HT processors due to + * speculative nature of implementation. * 1.11 22 Mar 2002 Tigran Aivazian * Fix the panic when writing zero-length microcode chunk. * 1.12 29 Sep 2003 Nitin Kamble , @@ -71,7 +71,7 @@ * Thanks to Stuart Swales for pointing out this bug. */ -//#define DEBUG /* pr_debug */ +/* #define DEBUG */ /* pr_debug */ #include #include #include @@ -99,11 +99,11 @@ MODULE_DESCRIPTION("Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian "); MODULE_LICENSE("GPL"); -#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ -#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) /* 48 bytes */ -#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ -#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) /* 20 bytes */ -#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) /* 12 bytes */ +#define DEFAULT_UCODE_DATASIZE (2000) +#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) +#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) +#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) +#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) #define DWSIZE (sizeof(u32)) #define get_totalsize(mc) \ (((struct microcode_intel *)mc)->hdr.totalsize ? \ -- cgit v0.10.2 From 831f9bd3151ebd22959a0cb2a20b44a06b26d4ed Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Tue, 29 Jul 2008 17:41:06 +0200 Subject: x86: moved function declarations out from AMD microcode patch loader to heade file Signed-off-by: Peter Oruba Cc: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 6789530..2b98e6a 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -56,9 +56,6 @@ MODULE_LICENSE("GPL v2"); ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \ + MC_HEADER_SIZE) -extern int microcode_init(void *opaque, struct module *module); -extern void microcode_exit(void); - /* serialize access to the physical write */ static DEFINE_SPINLOCK(microcode_update_lock); -- cgit v0.10.2 From daa9c0fee1cbe1d49fbe29af3d4bb16312043b1e Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Tue, 29 Jul 2008 17:41:07 +0200 Subject: x86: minor pointer type cast in AMD microcode patch loader Signed-off-by: Peter Oruba Cc: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 2b98e6a..33b2a21 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -394,7 +394,8 @@ static int cpu_request_microcode_amd(int cpu) return error; } - buf_pos = buf = firmware->data; + buf_pos = (unsigned int *)firmware->data; + buf = (void *)firmware->data; size = firmware->size; if (buf_pos[0] != UCODE_MAGIC) { -- cgit v0.10.2 From 7ab6af7ab69df8c9c5fbc380004fb81187742096 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 30 Jul 2008 17:36:48 -0700 Subject: x86_32: use apic_ops at print_local_APIC() Use apic_icr_read at print_local_APIC() in io_apic_32.c Signed-off-by: Hiroshi Shimamoto Cc: Suresh Siddha Cc: Yinghai Lu Signed-off-by: Ingo Molnar Cc: Suresh Siddha Cc: Yinghai Lu diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 98e4db53..39e063a 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1486,6 +1486,7 @@ static void print_APIC_bitfield(int base) void /*__init*/ print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; + u64 icr; if (apic_verbosity == APIC_QUIET) return; @@ -1536,10 +1537,9 @@ void /*__init*/ print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC ESR: %08x\n", v); } - v = apic_read(APIC_ICR); - printk(KERN_DEBUG "... APIC ICR: %08x\n", v); - v = apic_read(APIC_ICR2); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); + icr = apic_icr_read(); + printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); v = apic_read(APIC_LVTT); printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); -- cgit v0.10.2 From 53efec9513cfb1acff602c7ebdd945d677808e9e Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 28 Jul 2008 19:44:05 +0200 Subject: pcmcia: only copy CIS override data once Instead of copying CIS override data in socket_sysfs.c or ds.c, and then again in cistpl.c, only do so once. Also, cisdump_t is now only used by the deprecated ioctl. Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c index 65129b5..11c473c 100644 --- a/drivers/pcmcia/cistpl.c +++ b/drivers/pcmcia/cistpl.c @@ -265,13 +265,13 @@ EXPORT_SYMBOL(pcmcia_write_cis_mem); ======================================================================*/ static void read_cis_cache(struct pcmcia_socket *s, int attr, u_int addr, - u_int len, void *ptr) + size_t len, void *ptr) { struct cis_cache_entry *cis; int ret; if (s->fake_cis) { - if (s->fake_cis_len > addr+len) + if (s->fake_cis_len >= addr+len) memcpy(ptr, s->fake_cis+addr, len); else memset(ptr, 0xff, len); @@ -380,17 +380,17 @@ int verify_cis_cache(struct pcmcia_socket *s) ======================================================================*/ -int pcmcia_replace_cis(struct pcmcia_socket *s, cisdump_t *cis) +int pcmcia_replace_cis(struct pcmcia_socket *s, + const u8 *data, const size_t len) { - kfree(s->fake_cis); - s->fake_cis = NULL; - if (cis->Length > CISTPL_MAX_CIS_SIZE) + if (len > CISTPL_MAX_CIS_SIZE) return CS_BAD_SIZE; - s->fake_cis = kmalloc(cis->Length, GFP_KERNEL); + kfree(s->fake_cis); + s->fake_cis = kmalloc(len, GFP_KERNEL); if (s->fake_cis == NULL) return CS_OUT_OF_RESOURCE; - s->fake_cis_len = cis->Length; - memcpy(s->fake_cis, cis->Data, cis->Length); + s->fake_cis_len = len; + memcpy(s->fake_cis, data, len); return CS_SUCCESS; } EXPORT_SYMBOL(pcmcia_replace_cis); diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index 4174d96..2382341 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -854,7 +854,6 @@ static int pcmcia_load_firmware(struct pcmcia_device *dev, char * filename) int ret = -ENOMEM; int no_funcs; int old_funcs; - cisdump_t *cis; cistpl_longlink_mfc_t mfc; if (!filename) @@ -877,16 +876,7 @@ static int pcmcia_load_firmware(struct pcmcia_device *dev, char * filename) goto release; } - cis = kzalloc(sizeof(cisdump_t), GFP_KERNEL); - if (!cis) { - ret = -ENOMEM; - goto release; - } - - cis->Length = fw->size + 1; - memcpy(cis->Data, fw->data, fw->size); - - if (!pcmcia_replace_cis(s, cis)) + if (!pcmcia_replace_cis(s, fw->data, fw->size)) ret = 0; else { printk(KERN_ERR "pcmcia: CIS override failed\n"); diff --git a/drivers/pcmcia/pcmcia_ioctl.c b/drivers/pcmcia/pcmcia_ioctl.c index 419f97f..6c086ff 100644 --- a/drivers/pcmcia/pcmcia_ioctl.c +++ b/drivers/pcmcia/pcmcia_ioctl.c @@ -867,7 +867,7 @@ static int ds_ioctl(struct inode * inode, struct file * file, &buf->win_info.map); break; case DS_REPLACE_CIS: - ret = pcmcia_replace_cis(s, &buf->cisdump); + ret = pcmcia_replace_cis(s, buf->cisdump.Data, buf->cisdump.Length); break; case DS_BIND_REQUEST: if (!capable(CAP_SYS_ADMIN)) { diff --git a/drivers/pcmcia/socket_sysfs.c b/drivers/pcmcia/socket_sysfs.c index 006a29e..ff9a3bb 100644 --- a/drivers/pcmcia/socket_sysfs.c +++ b/drivers/pcmcia/socket_sysfs.c @@ -316,27 +316,18 @@ static ssize_t pccard_store_cis(struct kobject *kobj, char *buf, loff_t off, size_t count) { struct pcmcia_socket *s = to_socket(container_of(kobj, struct device, kobj)); - cisdump_t *cis; int error; if (off) return -EINVAL; - if (count >= 0x200) + if (count >= CISTPL_MAX_CIS_SIZE) return -EINVAL; if (!(s->state & SOCKET_PRESENT)) return -ENODEV; - cis = kzalloc(sizeof(cisdump_t), GFP_KERNEL); - if (!cis) - return -ENOMEM; - - cis->Length = count + 1; - memcpy(cis->Data, buf, count); - - error = pcmcia_replace_cis(s, cis); - kfree(cis); + error = pcmcia_replace_cis(s, buf, count); if (error) return -EIO; diff --git a/include/pcmcia/cistpl.h b/include/pcmcia/cistpl.h index e2e10c1..552a332 100644 --- a/include/pcmcia/cistpl.h +++ b/include/pcmcia/cistpl.h @@ -580,14 +580,8 @@ typedef struct cisinfo_t { #define CISTPL_MAX_CIS_SIZE 0x200 -/* For ReplaceCIS */ -typedef struct cisdump_t { - u_int Length; - cisdata_t Data[CISTPL_MAX_CIS_SIZE]; -} cisdump_t; - - -int pcmcia_replace_cis(struct pcmcia_socket *s, cisdump_t *cis); +int pcmcia_replace_cis(struct pcmcia_socket *s, + const u8 *data, const size_t len); /* don't use outside of PCMCIA core yet */ int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int func, tuple_t *tuple); diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index b316027..2d36a4f 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -68,6 +68,12 @@ typedef struct region_info_t { #define REGION_BAR_MASK 0xe000 #define REGION_BAR_SHIFT 13 +/* For ReplaceCIS */ +typedef struct cisdump_t { + u_int Length; + cisdata_t Data[CISTPL_MAX_CIS_SIZE]; +} cisdump_t; + typedef union ds_ioctl_arg_t { adjust_t adjust; config_info_t config; diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h index ed919dd..e34bef0 100644 --- a/include/pcmcia/ss.h +++ b/include/pcmcia/ss.h @@ -199,8 +199,8 @@ struct pcmcia_socket { io_window_t io[MAX_IO_WIN]; window_t win[MAX_WIN]; struct list_head cis_cache; - u_int fake_cis_len; - char *fake_cis; + size_t fake_cis_len; + u8 *fake_cis; struct list_head socket_list; struct completion socket_released; -- cgit v0.10.2 From 3583a71183a02c51ca71cd180e9189cfb0411cc1 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 22 Jul 2008 20:21:23 +0300 Subject: make selinux_write_opts() static This patch makes the needlessly global selinux_write_opts() static. Signed-off-by: Adrian Bunk Signed-off-by: James Morris diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 3ae9bec..0ffd881 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -957,7 +957,8 @@ out_err: return rc; } -void selinux_write_opts(struct seq_file *m, struct security_mnt_opts *opts) +static void selinux_write_opts(struct seq_file *m, + struct security_mnt_opts *opts) { int i; char *prefix; -- cgit v0.10.2 From df4ea865f09580b1cad621c0426612f598847815 Mon Sep 17 00:00:00 2001 From: Vesa-Matti J Kari Date: Sun, 20 Jul 2008 23:57:01 +0300 Subject: SELinux: Trivial minor fixes that change C null character style Trivial minor fixes that change C null character style. Signed-off-by: Vesa-Matti Kari Signed-off-by: James Morris diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c index fb4efe4..f8c850a 100644 --- a/security/selinux/ss/conditional.c +++ b/security/selinux/ss/conditional.c @@ -239,7 +239,7 @@ int cond_read_bool(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(key, fp, len); if (rc < 0) goto err; - key[len] = 0; + key[len] = '\0'; if (hashtab_insert(h, key, booldatum)) goto err; diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index 77d745d..b5407f1 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -283,8 +283,8 @@ int mls_context_to_sid(struct policydb *pol, p++; delim = *p; - if (delim != 0) - *p++ = 0; + if (delim != '\0') + *p++ = '\0'; for (l = 0; l < 2; l++) { levdatum = hashtab_search(pol->p_levels.table, scontextp); @@ -302,14 +302,14 @@ int mls_context_to_sid(struct policydb *pol, while (*p && *p != ',' && *p != '-') p++; delim = *p; - if (delim != 0) - *p++ = 0; + if (delim != '\0') + *p++ = '\0'; /* Separate into range if exists */ rngptr = strchr(scontextp, '.'); if (rngptr != NULL) { /* Remove '.' */ - *rngptr++ = 0; + *rngptr++ = '\0'; } catdatum = hashtab_search(pol->p_cats.table, @@ -357,8 +357,8 @@ int mls_context_to_sid(struct policydb *pol, p++; delim = *p; - if (delim != 0) - *p++ = 0; + if (delim != '\0') + *p++ = '\0'; } else break; } diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 2391761..2664630 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -932,7 +932,7 @@ static int perm_read(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(key, fp, len); if (rc < 0) goto bad; - key[len] = 0; + key[len] = '\0'; rc = hashtab_insert(h, key, perdatum); if (rc) @@ -979,7 +979,7 @@ static int common_read(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(key, fp, len); if (rc < 0) goto bad; - key[len] = 0; + key[len] = '\0'; for (i = 0; i < nel; i++) { rc = perm_read(p, comdatum->permissions.table, fp); @@ -1117,7 +1117,7 @@ static int class_read(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(key, fp, len); if (rc < 0) goto bad; - key[len] = 0; + key[len] = '\0'; if (len2) { cladatum->comkey = kmalloc(len2 + 1, GFP_KERNEL); @@ -1128,7 +1128,7 @@ static int class_read(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(cladatum->comkey, fp, len2); if (rc < 0) goto bad; - cladatum->comkey[len2] = 0; + cladatum->comkey[len2] = '\0'; cladatum->comdatum = hashtab_search(p->p_commons.table, cladatum->comkey); @@ -1201,7 +1201,7 @@ static int role_read(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(key, fp, len); if (rc < 0) goto bad; - key[len] = 0; + key[len] = '\0'; rc = ebitmap_read(&role->dominates, fp); if (rc) @@ -1262,7 +1262,7 @@ static int type_read(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(key, fp, len); if (rc < 0) goto bad; - key[len] = 0; + key[len] = '\0'; rc = hashtab_insert(h, key, typdatum); if (rc) @@ -1334,7 +1334,7 @@ static int user_read(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(key, fp, len); if (rc < 0) goto bad; - key[len] = 0; + key[len] = '\0'; rc = ebitmap_read(&usrdatum->roles, fp); if (rc) @@ -1388,7 +1388,7 @@ static int sens_read(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(key, fp, len); if (rc < 0) goto bad; - key[len] = 0; + key[len] = '\0'; levdatum->level = kmalloc(sizeof(struct mls_level), GFP_ATOMIC); if (!levdatum->level) { @@ -1440,7 +1440,7 @@ static int cat_read(struct policydb *p, struct hashtab *h, void *fp) rc = next_entry(key, fp, len); if (rc < 0) goto bad; - key[len] = 0; + key[len] = '\0'; rc = hashtab_insert(h, key, catdatum); if (rc) @@ -1523,7 +1523,7 @@ int policydb_read(struct policydb *p, void *fp) kfree(policydb_str); goto bad; } - policydb_str[len] = 0; + policydb_str[len] = '\0'; if (strcmp(policydb_str, POLICYDB_STRING)) { printk(KERN_ERR "SELinux: policydb string %s does not match " "my string %s\n", policydb_str, POLICYDB_STRING); -- cgit v0.10.2 From 0c0e186f812457e527c420f7a4d02865fd0dc7d2 Mon Sep 17 00:00:00 2001 From: Vesa-Matti J Kari Date: Mon, 21 Jul 2008 02:50:20 +0300 Subject: SELinux: trivial, remove unneeded local variable Hello, Remove unneeded local variable: struct avtab_node *newnode Signed-off-by: Vesa-Matti Kari Signed-off-by: James Morris diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c index a1be97f..e8ae812 100644 --- a/security/selinux/ss/avtab.c +++ b/security/selinux/ss/avtab.c @@ -98,7 +98,7 @@ struct avtab_node * avtab_insert_nonunique(struct avtab *h, struct avtab_key *key, struct avtab_datum *datum) { int hvalue; - struct avtab_node *prev, *cur, *newnode; + struct avtab_node *prev, *cur; u16 specified = key->specified & ~(AVTAB_ENABLED|AVTAB_ENABLED_OLD); if (!h || !h->htable) @@ -122,9 +122,7 @@ avtab_insert_nonunique(struct avtab *h, struct avtab_key *key, struct avtab_datu key->target_class < cur->key.target_class) break; } - newnode = avtab_insert_node(h, hvalue, prev, cur, key, datum); - - return newnode; + return avtab_insert_node(h, hvalue, prev, cur, key, datum); } struct avtab_datum *avtab_search(struct avtab *h, struct avtab_key *key) -- cgit v0.10.2 From cf9481e289247fe9cf40f2e2481220d899132049 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 27 Jul 2008 21:31:07 +1000 Subject: SELinux: Fix a potentially uninitialised variable in SELinux hooks Fix a potentially uninitialised variable in SELinux hooks that's given a pointer to the network address by selinux_parse_skb() passing a pointer back through its argument list. By restructuring selinux_parse_skb(), the compiler can see that the error case need not set it as the caller will return immediately. Signed-off-by: David Howells Signed-off-by: James Morris diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 0ffd881..3eae306 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3539,38 +3539,44 @@ out: #endif /* IPV6 */ static int selinux_parse_skb(struct sk_buff *skb, struct avc_audit_data *ad, - char **addrp, int src, u8 *proto) + char **_addrp, int src, u8 *proto) { - int ret = 0; + char *addrp; + int ret; switch (ad->u.net.family) { case PF_INET: ret = selinux_parse_skb_ipv4(skb, ad, proto); - if (ret || !addrp) - break; - *addrp = (char *)(src ? &ad->u.net.v4info.saddr : - &ad->u.net.v4info.daddr); - break; + if (ret) + goto parse_error; + addrp = (char *)(src ? &ad->u.net.v4info.saddr : + &ad->u.net.v4info.daddr); + goto okay; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case PF_INET6: ret = selinux_parse_skb_ipv6(skb, ad, proto); - if (ret || !addrp) - break; - *addrp = (char *)(src ? &ad->u.net.v6info.saddr : - &ad->u.net.v6info.daddr); - break; + if (ret) + goto parse_error; + addrp = (char *)(src ? &ad->u.net.v6info.saddr : + &ad->u.net.v6info.daddr); + goto okay; #endif /* IPV6 */ default: - break; + addrp = NULL; + goto okay; } - if (unlikely(ret)) - printk(KERN_WARNING - "SELinux: failure in selinux_parse_skb()," - " unable to parse packet\n"); - +parse_error: + printk(KERN_WARNING + "SELinux: failure in selinux_parse_skb()," + " unable to parse packet\n"); return ret; + +okay: + if (_addrp) + *_addrp = addrp; + return 0; } /** -- cgit v0.10.2 From 15446235367fa4a621ff5abfa4b6ebbe25b33763 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 30 Jul 2008 15:37:11 -0700 Subject: smack: limit privilege by label There have been a number of requests to make the Smack LSM enforce MAC even in the face of privilege, either capability based or superuser based. This is not universally desired, however, so it seems desirable to make it optional. Further, at least one legacy OS implemented a scheme whereby only processes running with one particular label could be exempt from MAC. This patch supports these three cases. If /smack/onlycap is empty (unset or null-string) privilege is enforced in the normal way. If /smack/onlycap contains a label only processes running with that label may be MAC exempt. If the label in /smack/onlycap is the star label ("*") the semantics of the star label combine with the privilege restrictions to prevent any violations of MAC, even in the presence of privilege. Again, this will be independent of the privilege scheme. Signed-off-by: Casey Schaufler Reviewed-by: James Morris diff --git a/security/smack/smack.h b/security/smack/smack.h index 4a4477f..31dce55 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -178,6 +178,7 @@ u32 smack_to_secid(const char *); extern int smack_cipso_direct; extern int smack_net_nltype; extern char *smack_net_ambient; +extern char *smack_onlycap; extern struct smack_known *smack_known; extern struct smack_known smack_known_floor; diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index f6b5f6e..79ff21e 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -157,7 +157,7 @@ int smk_access(char *subject_label, char *object_label, int request) * * This function checks the current subject label/object label pair * in the access rule list and returns 0 if the access is permitted, - * non zero otherwise. It allows that current my have the capability + * non zero otherwise. It allows that current may have the capability * to override the rules. */ int smk_curacc(char *obj_label, u32 mode) @@ -168,6 +168,14 @@ int smk_curacc(char *obj_label, u32 mode) if (rc == 0) return 0; + /* + * Return if a specific label has been designated as the + * only one that gets privilege and current does not + * have that label. + */ + if (smack_onlycap != NULL && smack_onlycap != current->security) + return rc; + if (capable(CAP_MAC_OVERRIDE)) return 0; diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index 271a835..e7c6424 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -39,6 +39,7 @@ enum smk_inos { SMK_DIRECT = 6, /* CIPSO level indicating direct label */ SMK_AMBIENT = 7, /* internet ambient label */ SMK_NLTYPE = 8, /* label scheme to use by default */ + SMK_ONLYCAP = 9, /* the only "capable" label */ }; /* @@ -68,6 +69,16 @@ int smack_net_nltype = NETLBL_NLTYPE_CIPSOV4; */ int smack_cipso_direct = SMACK_CIPSO_DIRECT_DEFAULT; +/* + * Unless a process is running with this label even + * having CAP_MAC_OVERRIDE isn't enough to grant + * privilege to violate MAC policy. If no label is + * designated (the NULL case) capabilities apply to + * everyone. It is expected that the hat (^) label + * will be used if any label is used. + */ +char *smack_onlycap; + static int smk_cipso_doi_value = SMACK_CIPSO_DOI_DEFAULT; struct smk_list_entry *smack_list; @@ -787,6 +798,85 @@ static const struct file_operations smk_ambient_ops = { .write = smk_write_ambient, }; +/** + * smk_read_onlycap - read() for /smack/onlycap + * @filp: file pointer, not actually used + * @buf: where to put the result + * @cn: maximum to send along + * @ppos: where to start + * + * Returns number of bytes read or error code, as appropriate + */ +static ssize_t smk_read_onlycap(struct file *filp, char __user *buf, + size_t cn, loff_t *ppos) +{ + char *smack = ""; + ssize_t rc = -EINVAL; + int asize; + + if (*ppos != 0) + return 0; + + if (smack_onlycap != NULL) + smack = smack_onlycap; + + asize = strlen(smack) + 1; + + if (cn >= asize) + rc = simple_read_from_buffer(buf, cn, ppos, smack, asize); + + return rc; +} + +/** + * smk_write_onlycap - write() for /smack/onlycap + * @filp: file pointer, not actually used + * @buf: where to get the data from + * @count: bytes sent + * @ppos: where to start + * + * Returns number of bytes written or error code, as appropriate + */ +static ssize_t smk_write_onlycap(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + char in[SMK_LABELLEN]; + char *sp = current->security; + + if (!capable(CAP_MAC_ADMIN)) + return -EPERM; + + /* + * This can be done using smk_access() but is done + * explicitly for clarity. The smk_access() implementation + * would use smk_access(smack_onlycap, MAY_WRITE) + */ + if (smack_onlycap != NULL && smack_onlycap != sp) + return -EPERM; + + if (count >= SMK_LABELLEN) + return -EINVAL; + + if (copy_from_user(in, buf, count) != 0) + return -EFAULT; + + /* + * Should the null string be passed in unset the onlycap value. + * This seems like something to be careful with as usually + * smk_import only expects to return NULL for errors. It + * is usually the case that a nullstring or "\n" would be + * bad to pass to smk_import but in fact this is useful here. + */ + smack_onlycap = smk_import(in, count); + + return count; +} + +static const struct file_operations smk_onlycap_ops = { + .read = smk_read_onlycap, + .write = smk_write_onlycap, +}; + struct option_names { int o_number; char *o_name; @@ -919,6 +1009,8 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent) {"ambient", &smk_ambient_ops, S_IRUGO|S_IWUSR}, [SMK_NLTYPE] = {"nltype", &smk_nltype_ops, S_IRUGO|S_IWUSR}, + [SMK_ONLYCAP] = + {"onlycap", &smk_onlycap_ops, S_IRUGO|S_IWUSR}, /* last one */ {""} }; -- cgit v0.10.2 From 421fae06be9e0dac45747494756b3580643815f9 Mon Sep 17 00:00:00 2001 From: Vesa-Matti Kari Date: Wed, 6 Aug 2008 18:24:51 +0300 Subject: selinux: conditional expression type validation was off-by-one expr_isvalid() in conditional.c was off-by-one and allowed invalid expression type COND_LAST. However, it is this header file that needs to be fixed. That way the if-statement's disjunction's second component reads more naturally, "if expr type is greater than the last allowed value" ( rather than using ">=" in conditional.c): if (expr->expr_type <= 0 || expr->expr_type > COND_LAST) Signed-off-by: Vesa-Matti Kari Signed-off-by: James Morris diff --git a/security/selinux/ss/conditional.h b/security/selinux/ss/conditional.h index 65b9f83..53ddb01 100644 --- a/security/selinux/ss/conditional.h +++ b/security/selinux/ss/conditional.h @@ -28,7 +28,7 @@ struct cond_expr { #define COND_XOR 5 /* bool ^ bool */ #define COND_EQ 6 /* bool == bool */ #define COND_NEQ 7 /* bool != bool */ -#define COND_LAST 8 +#define COND_LAST COND_NEQ __u32 expr_type; __u32 bool; struct cond_expr *next; -- cgit v0.10.2 From c04148f915e5ba7947752e6348e0da4cdab1329e Mon Sep 17 00:00:00 2001 From: "Alfred E. Heggestad" Date: Fri, 8 Aug 2008 11:49:08 -0400 Subject: Input: add driver for USB VoIP phones with CM109 chipset Signed-off-by: Alfred E. Heggestad Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index e99b788..199055d 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -180,6 +180,19 @@ config INPUT_YEALINK To compile this driver as a module, choose M here: the module will be called yealink. +config INPUT_CM109 + tristate "C-Media CM109 USB I/O Controller" + depends on EXPERIMENTAL + depends on USB_ARCH_HAS_HCD + select USB + help + Say Y here if you want to enable keyboard and buzzer functions of the + C-Media CM109 usb phones. The audio part is enabled by the generic + usb sound driver, so you might want to enable that as well. + + To compile this driver as a module, choose M here: the module will be + called cm109. + config INPUT_UINPUT tristate "User level driver support" help diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile index f48009b..d7db2ae 100644 --- a/drivers/input/misc/Makefile +++ b/drivers/input/misc/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_INPUT_ATI_REMOTE2) += ati_remote2.o obj-$(CONFIG_INPUT_KEYSPAN_REMOTE) += keyspan_remote.o obj-$(CONFIG_INPUT_POWERMATE) += powermate.o obj-$(CONFIG_INPUT_YEALINK) += yealink.o +obj-$(CONFIG_INPUT_CM109) += cm109.o obj-$(CONFIG_HP_SDC_RTC) += hp_sdc_rtc.o obj-$(CONFIG_INPUT_UINPUT) += uinput.o obj-$(CONFIG_INPUT_APANEL) += apanel.o diff --git a/drivers/input/misc/cm109.c b/drivers/input/misc/cm109.c new file mode 100644 index 0000000..404fd49 --- /dev/null +++ b/drivers/input/misc/cm109.c @@ -0,0 +1,884 @@ +/* + * Driver for the VoIP USB phones with CM109 chipsets. + * + * Copyright (C) 2007 - 2008 Alfred E. Heggestad + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2. + */ + +/* + * Tested devices: + * - Komunikate KIP1000 + * - Genius G-talk + * - Allied-Telesis Corega USBPH01 + * - ... + * + * This driver is based on the yealink.c driver + * + * Thanks to: + * - Authors of yealink.c + * - Thomas Reitmayr + * - Oliver Neukum for good review comments and code + * - Shaun Jackman for Genius G-talk keymap + * - Dmitry Torokhov for valuable input and review + * + * Todo: + * - Read/write EEPROM + */ + +#include +#include +#include +#include +#include +#include +#include + +#define CM109_DEBUG 0 + +#define DRIVER_VERSION "20080805" +#define DRIVER_AUTHOR "Alfred E. Heggestad" +#define DRIVER_DESC "CM109 phone driver" + +static char *phone = "kip1000"; +module_param(phone, charp, S_IRUSR); +MODULE_PARM_DESC(phone, "Phone name {kip1000, gtalk, usbph01}"); + +enum { + /* HID Registers */ + HID_IR0 = 0x00, /* Record/Playback-mute button, Volume up/down */ + HID_IR1 = 0x01, /* GPI, generic registers or EEPROM_DATA0 */ + HID_IR2 = 0x02, /* Generic registers or EEPROM_DATA1 */ + HID_IR3 = 0x03, /* Generic registers or EEPROM_CTRL */ + HID_OR0 = 0x00, /* Mapping control, buzzer, SPDIF (offset 0x04) */ + HID_OR1 = 0x01, /* GPO - General Purpose Output */ + HID_OR2 = 0x02, /* Set GPIO to input/output mode */ + HID_OR3 = 0x03, /* SPDIF status channel or EEPROM_CTRL */ + + /* HID_IR0 */ + RECORD_MUTE = 1 << 3, + PLAYBACK_MUTE = 1 << 2, + VOLUME_DOWN = 1 << 1, + VOLUME_UP = 1 << 0, + + /* HID_OR0 */ + /* bits 7-6 + 0: HID_OR1-2 are used for GPO; HID_OR0, 3 are used for buzzer + and SPDIF + 1: HID_OR0-3 are used as generic HID registers + 2: Values written to HID_OR0-3 are also mapped to MCU_CTRL, + EEPROM_DATA0-1, EEPROM_CTRL (see Note) + 3: Reserved + */ + HID_OR_GPO_BUZ_SPDIF = 0 << 6, + HID_OR_GENERIC_HID_REG = 1 << 6, + HID_OR_MAP_MCU_EEPROM = 2 << 6, + + BUZZER_ON = 1 << 5, + + /* up to 256 normal keys, up to 16 special keys */ + KEYMAP_SIZE = 256 + 16, +}; + +/* CM109 protocol packet */ +struct cm109_ctl_packet { + u8 byte[4]; +} __attribute__ ((packed)); + +enum { USB_PKT_LEN = sizeof(struct cm109_ctl_packet) }; + +/* CM109 device structure */ +struct cm109_dev { + struct input_dev *idev; /* input device */ + struct usb_device *udev; /* usb device */ + struct usb_interface *intf; + + /* irq input channel */ + struct cm109_ctl_packet *irq_data; + dma_addr_t irq_dma; + struct urb *urb_irq; + + /* control output channel */ + struct cm109_ctl_packet *ctl_data; + dma_addr_t ctl_dma; + struct usb_ctrlrequest *ctl_req; + dma_addr_t ctl_req_dma; + struct urb *urb_ctl; + /* + * The 3 bitfields below are protected by ctl_submit_lock. + * They have to be separate since they are accessed from IRQ + * context. + */ + unsigned irq_urb_pending:1; /* irq_urb is in flight */ + unsigned ctl_urb_pending:1; /* ctl_urb is in flight */ + unsigned buzzer_pending:1; /* need to issue buzz command */ + spinlock_t ctl_submit_lock; + + unsigned char buzzer_state; /* on/off */ + + /* flags */ + unsigned open:1; + unsigned resetting:1; + unsigned shutdown:1; + + /* This mutex protects writes to the above flags */ + struct mutex pm_mutex; + + unsigned short keymap[KEYMAP_SIZE]; + + char phys[64]; /* physical device path */ + int key_code; /* last reported key */ + int keybit; /* 0=new scan 1,2,4,8=scan columns */ + u8 gpi; /* Cached value of GPI (high nibble) */ +}; + +/****************************************************************************** + * CM109 key interface + *****************************************************************************/ + +static unsigned short special_keymap(int code) +{ + if (code > 0xff) { + switch (code - 0xff) { + case RECORD_MUTE: return KEY_MUTE; + case PLAYBACK_MUTE: return KEY_MUTE; + case VOLUME_DOWN: return KEY_VOLUMEDOWN; + case VOLUME_UP: return KEY_VOLUMEUP; + } + } + return KEY_RESERVED; +} + +/* Map device buttons to internal key events. + * + * The "up" and "down" keys, are symbolised by arrows on the button. + * The "pickup" and "hangup" keys are symbolised by a green and red phone + * on the button. + + Komunikate KIP1000 Keyboard Matrix + + -> -- 1 -- 2 -- 3 --> GPI pin 4 (0x10) + | | | | + <- -- 4 -- 5 -- 6 --> GPI pin 5 (0x20) + | | | | + END - 7 -- 8 -- 9 --> GPI pin 6 (0x40) + | | | | + OK -- * -- 0 -- # --> GPI pin 7 (0x80) + | | | | + + /|\ /|\ /|\ /|\ + | | | | +GPO +pin: 3 2 1 0 + 0x8 0x4 0x2 0x1 + + */ +static unsigned short keymap_kip1000(int scancode) +{ + switch (scancode) { /* phone key: */ + case 0x82: return KEY_NUMERIC_0; /* 0 */ + case 0x14: return KEY_NUMERIC_1; /* 1 */ + case 0x12: return KEY_NUMERIC_2; /* 2 */ + case 0x11: return KEY_NUMERIC_3; /* 3 */ + case 0x24: return KEY_NUMERIC_4; /* 4 */ + case 0x22: return KEY_NUMERIC_5; /* 5 */ + case 0x21: return KEY_NUMERIC_6; /* 6 */ + case 0x44: return KEY_NUMERIC_7; /* 7 */ + case 0x42: return KEY_NUMERIC_8; /* 8 */ + case 0x41: return KEY_NUMERIC_9; /* 9 */ + case 0x81: return KEY_NUMERIC_POUND; /* # */ + case 0x84: return KEY_NUMERIC_STAR; /* * */ + case 0x88: return KEY_ENTER; /* pickup */ + case 0x48: return KEY_ESC; /* hangup */ + case 0x28: return KEY_LEFT; /* IN */ + case 0x18: return KEY_RIGHT; /* OUT */ + default: return special_keymap(scancode); + } +} + +/* + Contributed by Shaun Jackman + + Genius G-Talk keyboard matrix + 0 1 2 3 + 4: 0 4 8 Talk + 5: 1 5 9 End + 6: 2 6 # Up + 7: 3 7 * Down +*/ +static unsigned short keymap_gtalk(int scancode) +{ + switch (scancode) { + case 0x11: return KEY_NUMERIC_0; + case 0x21: return KEY_NUMERIC_1; + case 0x41: return KEY_NUMERIC_2; + case 0x81: return KEY_NUMERIC_3; + case 0x12: return KEY_NUMERIC_4; + case 0x22: return KEY_NUMERIC_5; + case 0x42: return KEY_NUMERIC_6; + case 0x82: return KEY_NUMERIC_7; + case 0x14: return KEY_NUMERIC_8; + case 0x24: return KEY_NUMERIC_9; + case 0x44: return KEY_NUMERIC_POUND; /* # */ + case 0x84: return KEY_NUMERIC_STAR; /* * */ + case 0x18: return KEY_ENTER; /* Talk (green handset) */ + case 0x28: return KEY_ESC; /* End (red handset) */ + case 0x48: return KEY_UP; /* Menu up (rocker switch) */ + case 0x88: return KEY_DOWN; /* Menu down (rocker switch) */ + default: return special_keymap(scancode); + } +} + +/* + * Keymap for Allied-Telesis Corega USBPH01 + * http://www.alliedtelesis-corega.com/2/1344/1437/1360/chprd.html + * + * Contributed by july@nat.bg + */ +static unsigned short keymap_usbph01(int scancode) +{ + switch (scancode) { + case 0x11: return KEY_NUMERIC_0; /* 0 */ + case 0x21: return KEY_NUMERIC_1; /* 1 */ + case 0x41: return KEY_NUMERIC_2; /* 2 */ + case 0x81: return KEY_NUMERIC_3; /* 3 */ + case 0x12: return KEY_NUMERIC_4; /* 4 */ + case 0x22: return KEY_NUMERIC_5; /* 5 */ + case 0x42: return KEY_NUMERIC_6; /* 6 */ + case 0x82: return KEY_NUMERIC_7; /* 7 */ + case 0x14: return KEY_NUMERIC_8; /* 8 */ + case 0x24: return KEY_NUMERIC_9; /* 9 */ + case 0x44: return KEY_NUMERIC_POUND; /* # */ + case 0x84: return KEY_NUMERIC_STAR; /* * */ + case 0x18: return KEY_ENTER; /* pickup */ + case 0x28: return KEY_ESC; /* hangup */ + case 0x48: return KEY_LEFT; /* IN */ + case 0x88: return KEY_RIGHT; /* OUT */ + default: return special_keymap(scancode); + } +} + +static unsigned short (*keymap)(int) = keymap_kip1000; + +/* + * Completes a request by converting the data into events for the + * input subsystem. + */ +static void report_key(struct cm109_dev *dev, int key) +{ + struct input_dev *idev = dev->idev; + + if (dev->key_code >= 0) { + /* old key up */ + input_report_key(idev, dev->key_code, 0); + } + + dev->key_code = key; + if (key >= 0) { + /* new valid key */ + input_report_key(idev, key, 1); + } + + input_sync(idev); +} + +/****************************************************************************** + * CM109 usb communication interface + *****************************************************************************/ + +static void cm109_submit_buzz_toggle(struct cm109_dev *dev) +{ + int error; + + if (dev->buzzer_state) + dev->ctl_data->byte[HID_OR0] |= BUZZER_ON; + else + dev->ctl_data->byte[HID_OR0] &= ~BUZZER_ON; + + error = usb_submit_urb(dev->urb_ctl, GFP_ATOMIC); + if (error) + err("%s: usb_submit_urb (urb_ctl) failed %d", __func__, error); +} + +/* + * IRQ handler + */ +static void cm109_urb_irq_callback(struct urb *urb) +{ + struct cm109_dev *dev = urb->context; + const int status = urb->status; + int error; + +#if CM109_DEBUG + info("### URB IRQ: [0x%02x 0x%02x 0x%02x 0x%02x] keybit=0x%02x", + dev->irq_data->byte[0], + dev->irq_data->byte[1], + dev->irq_data->byte[2], + dev->irq_data->byte[3], + dev->keybit); +#endif + + if (status) { + if (status == -ESHUTDOWN) + return; + err("%s: urb status %d", __func__, status); + } + + /* Special keys */ + if (dev->irq_data->byte[HID_IR0] & 0x0f) { + const int code = (dev->irq_data->byte[HID_IR0] & 0x0f); + report_key(dev, dev->keymap[0xff + code]); + } + + /* Scan key column */ + if (dev->keybit == 0xf) { + + /* Any changes ? */ + if ((dev->gpi & 0xf0) == (dev->irq_data->byte[HID_IR1] & 0xf0)) + goto out; + + dev->gpi = dev->irq_data->byte[HID_IR1] & 0xf0; + dev->keybit = 0x1; + } else { + report_key(dev, dev->keymap[dev->irq_data->byte[HID_IR1]]); + + dev->keybit <<= 1; + if (dev->keybit > 0x8) + dev->keybit = 0xf; + } + + out: + + spin_lock(&dev->ctl_submit_lock); + + dev->irq_urb_pending = 0; + + if (likely(!dev->shutdown)) { + + if (dev->buzzer_state) + dev->ctl_data->byte[HID_OR0] |= BUZZER_ON; + else + dev->ctl_data->byte[HID_OR0] &= ~BUZZER_ON; + + dev->ctl_data->byte[HID_OR1] = dev->keybit; + dev->ctl_data->byte[HID_OR2] = dev->keybit; + + dev->buzzer_pending = 0; + dev->ctl_urb_pending = 1; + + error = usb_submit_urb(dev->urb_ctl, GFP_ATOMIC); + if (error) + err("%s: usb_submit_urb (urb_ctl) failed %d", + __func__, error); + } + + spin_unlock(&dev->ctl_submit_lock); +} + +static void cm109_urb_ctl_callback(struct urb *urb) +{ + struct cm109_dev *dev = urb->context; + const int status = urb->status; + int error; + +#if CM109_DEBUG + info("### URB CTL: [0x%02x 0x%02x 0x%02x 0x%02x]", + dev->ctl_data->byte[0], + dev->ctl_data->byte[1], + dev->ctl_data->byte[2], + dev->ctl_data->byte[3]); +#endif + + if (status) + err("%s: urb status %d", __func__, status); + + spin_lock(&dev->ctl_submit_lock); + + dev->ctl_urb_pending = 0; + + if (likely(!dev->shutdown)) { + + if (dev->buzzer_pending) { + dev->buzzer_pending = 0; + dev->ctl_urb_pending = 1; + cm109_submit_buzz_toggle(dev); + } else if (likely(!dev->irq_urb_pending)) { + /* ask for key data */ + dev->irq_urb_pending = 1; + error = usb_submit_urb(dev->urb_irq, GFP_ATOMIC); + if (error) + err("%s: usb_submit_urb (urb_irq) failed %d", + __func__, error); + } + } + + spin_unlock(&dev->ctl_submit_lock); +} + +static void cm109_toggle_buzzer_async(struct cm109_dev *dev) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->ctl_submit_lock, flags); + + if (dev->ctl_urb_pending) { + /* URB completion will resubmit */ + dev->buzzer_pending = 1; + } else { + dev->ctl_urb_pending = 1; + cm109_submit_buzz_toggle(dev); + } + + spin_unlock_irqrestore(&dev->ctl_submit_lock, flags); +} + +static void cm109_toggle_buzzer_sync(struct cm109_dev *dev, int on) +{ + int error; + + if (on) + dev->ctl_data->byte[HID_OR0] |= BUZZER_ON; + else + dev->ctl_data->byte[HID_OR0] &= ~BUZZER_ON; + + error = usb_control_msg(dev->udev, + usb_sndctrlpipe(dev->udev, 0), + dev->ctl_req->bRequest, + dev->ctl_req->bRequestType, + le16_to_cpu(dev->ctl_req->wValue), + le16_to_cpu(dev->ctl_req->wIndex), + dev->ctl_data, + USB_PKT_LEN, USB_CTRL_SET_TIMEOUT); + if (error && error != EINTR) + err("%s: usb_control_msg() failed %d", __func__, error); +} + +static void cm109_stop_traffic(struct cm109_dev *dev) +{ + dev->shutdown = 1; + /* + * Make sure other CPUs see this + */ + smp_wmb(); + + usb_kill_urb(dev->urb_ctl); + usb_kill_urb(dev->urb_irq); + + cm109_toggle_buzzer_sync(dev, 0); + + dev->shutdown = 0; + smp_wmb(); +} + +static void cm109_restore_state(struct cm109_dev *dev) +{ + if (dev->open) { + /* + * Restore buzzer state. + * This will also kick regular URB submission + */ + cm109_toggle_buzzer_async(dev); + } +} + +/****************************************************************************** + * input event interface + *****************************************************************************/ + +static int cm109_input_open(struct input_dev *idev) +{ + struct cm109_dev *dev = input_get_drvdata(idev); + int error; + + error = usb_autopm_get_interface(dev->intf); + if (error < 0) { + err("%s - cannot autoresume, result %d", + __func__, error); + return error; + } + + mutex_lock(&dev->pm_mutex); + + dev->buzzer_state = 0; + dev->key_code = -1; /* no keys pressed */ + dev->keybit = 0xf; + + /* issue INIT */ + dev->ctl_data->byte[HID_OR0] = HID_OR_GPO_BUZ_SPDIF; + dev->ctl_data->byte[HID_OR1] = dev->keybit; + dev->ctl_data->byte[HID_OR2] = dev->keybit; + dev->ctl_data->byte[HID_OR3] = 0x00; + + error = usb_submit_urb(dev->urb_ctl, GFP_KERNEL); + if (error) + err("%s: usb_submit_urb (urb_ctl) failed %d", __func__, error); + else + dev->open = 1; + + mutex_unlock(&dev->pm_mutex); + + if (error) + usb_autopm_put_interface(dev->intf); + + return error; +} + +static void cm109_input_close(struct input_dev *idev) +{ + struct cm109_dev *dev = input_get_drvdata(idev); + + mutex_lock(&dev->pm_mutex); + + /* + * Once we are here event delivery is stopped so we + * don't need to worry about someone starting buzzer + * again + */ + cm109_stop_traffic(dev); + dev->open = 0; + + mutex_unlock(&dev->pm_mutex); + + usb_autopm_put_interface(dev->intf); +} + +static int cm109_input_ev(struct input_dev *idev, unsigned int type, + unsigned int code, int value) +{ + struct cm109_dev *dev = input_get_drvdata(idev); + +#if CM109_DEBUG + info("input_ev: type=%u code=%u value=%d", type, code, value); +#endif + + if (type != EV_SND) + return -EINVAL; + + switch (code) { + case SND_TONE: + case SND_BELL: + dev->buzzer_state = !!value; + if (!dev->resetting) + cm109_toggle_buzzer_async(dev); + return 0; + + default: + return -EINVAL; + } +} + + +/****************************************************************************** + * Linux interface and usb initialisation + *****************************************************************************/ + +struct driver_info { + char *name; +}; + +static const struct driver_info info_cm109 = { + .name = "CM109 USB driver", +}; + +enum { + VENDOR_ID = 0x0d8c, /* C-Media Electronics */ + PRODUCT_ID_CM109 = 0x000e, /* CM109 defines range 0x0008 - 0x000f */ +}; + +/* table of devices that work with this driver */ +static const struct usb_device_id cm109_usb_table[] = { + { + .match_flags = USB_DEVICE_ID_MATCH_DEVICE | + USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = VENDOR_ID, + .idProduct = PRODUCT_ID_CM109, + .bInterfaceClass = USB_CLASS_HID, + .bInterfaceSubClass = 0, + .bInterfaceProtocol = 0, + .driver_info = (kernel_ulong_t) &info_cm109 + }, + /* you can add more devices here with product ID 0x0008 - 0x000f */ + { } +}; + +static void cm109_usb_cleanup(struct cm109_dev *dev) +{ + if (dev->ctl_req) + usb_buffer_free(dev->udev, sizeof(*(dev->ctl_req)), + dev->ctl_req, dev->ctl_req_dma); + if (dev->ctl_data) + usb_buffer_free(dev->udev, USB_PKT_LEN, + dev->ctl_data, dev->ctl_dma); + if (dev->irq_data) + usb_buffer_free(dev->udev, USB_PKT_LEN, + dev->irq_data, dev->irq_dma); + + usb_free_urb(dev->urb_irq); /* parameter validation in core/urb */ + usb_free_urb(dev->urb_ctl); /* parameter validation in core/urb */ + kfree(dev); +} + +static void cm109_usb_disconnect(struct usb_interface *interface) +{ + struct cm109_dev *dev = usb_get_intfdata(interface); + + usb_set_intfdata(interface, NULL); + input_unregister_device(dev->idev); + cm109_usb_cleanup(dev); +} + +static int cm109_usb_probe(struct usb_interface *intf, + const struct usb_device_id *id) +{ + struct usb_device *udev = interface_to_usbdev(intf); + struct driver_info *nfo = (struct driver_info *)id->driver_info; + struct usb_host_interface *interface; + struct usb_endpoint_descriptor *endpoint; + struct cm109_dev *dev; + struct input_dev *input_dev = NULL; + int ret, pipe, i; + int error = -ENOMEM; + + interface = intf->cur_altsetting; + endpoint = &interface->endpoint[0].desc; + + if (!usb_endpoint_is_int_in(endpoint)) + return -ENODEV; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + spin_lock_init(&dev->ctl_submit_lock); + mutex_init(&dev->pm_mutex); + + dev->udev = udev; + dev->intf = intf; + + dev->idev = input_dev = input_allocate_device(); + if (!input_dev) + goto err_out; + + /* allocate usb buffers */ + dev->irq_data = usb_buffer_alloc(udev, USB_PKT_LEN, + GFP_KERNEL, &dev->irq_dma); + if (!dev->irq_data) + goto err_out; + + dev->ctl_data = usb_buffer_alloc(udev, USB_PKT_LEN, + GFP_KERNEL, &dev->ctl_dma); + if (!dev->ctl_data) + goto err_out; + + dev->ctl_req = usb_buffer_alloc(udev, sizeof(*(dev->ctl_req)), + GFP_KERNEL, &dev->ctl_req_dma); + if (!dev->ctl_req) + goto err_out; + + /* allocate urb structures */ + dev->urb_irq = usb_alloc_urb(0, GFP_KERNEL); + if (!dev->urb_irq) + goto err_out; + + dev->urb_ctl = usb_alloc_urb(0, GFP_KERNEL); + if (!dev->urb_ctl) + goto err_out; + + /* get a handle to the interrupt data pipe */ + pipe = usb_rcvintpipe(udev, endpoint->bEndpointAddress); + ret = usb_maxpacket(udev, pipe, usb_pipeout(pipe)); + if (ret != USB_PKT_LEN) + err("invalid payload size %d, expected %d", ret, USB_PKT_LEN); + + /* initialise irq urb */ + usb_fill_int_urb(dev->urb_irq, udev, pipe, dev->irq_data, + USB_PKT_LEN, + cm109_urb_irq_callback, dev, endpoint->bInterval); + dev->urb_irq->transfer_dma = dev->irq_dma; + dev->urb_irq->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; + dev->urb_irq->dev = udev; + + /* initialise ctl urb */ + dev->ctl_req->bRequestType = USB_TYPE_CLASS | USB_RECIP_INTERFACE | + USB_DIR_OUT; + dev->ctl_req->bRequest = USB_REQ_SET_CONFIGURATION; + dev->ctl_req->wValue = cpu_to_le16(0x200); + dev->ctl_req->wIndex = cpu_to_le16(interface->desc.bInterfaceNumber); + dev->ctl_req->wLength = cpu_to_le16(USB_PKT_LEN); + + usb_fill_control_urb(dev->urb_ctl, udev, usb_sndctrlpipe(udev, 0), + (void *)dev->ctl_req, dev->ctl_data, USB_PKT_LEN, + cm109_urb_ctl_callback, dev); + dev->urb_ctl->setup_dma = dev->ctl_req_dma; + dev->urb_ctl->transfer_dma = dev->ctl_dma; + dev->urb_ctl->transfer_flags |= URB_NO_SETUP_DMA_MAP | + URB_NO_TRANSFER_DMA_MAP; + dev->urb_ctl->dev = udev; + + /* find out the physical bus location */ + usb_make_path(udev, dev->phys, sizeof(dev->phys)); + strlcat(dev->phys, "/input0", sizeof(dev->phys)); + + /* register settings for the input device */ + input_dev->name = nfo->name; + input_dev->phys = dev->phys; + usb_to_input_id(udev, &input_dev->id); + input_dev->dev.parent = &intf->dev; + + input_set_drvdata(input_dev, dev); + input_dev->open = cm109_input_open; + input_dev->close = cm109_input_close; + input_dev->event = cm109_input_ev; + + input_dev->keycode = dev->keymap; + input_dev->keycodesize = sizeof(unsigned char); + input_dev->keycodemax = ARRAY_SIZE(dev->keymap); + + input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_SND); + input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE); + + /* register available key events */ + for (i = 0; i < KEYMAP_SIZE; i++) { + unsigned short k = keymap(i); + dev->keymap[i] = k; + __set_bit(k, input_dev->keybit); + } + __clear_bit(KEY_RESERVED, input_dev->keybit); + + error = input_register_device(dev->idev); + if (error) + goto err_out; + + usb_set_intfdata(intf, dev); + + return 0; + + err_out: + input_free_device(input_dev); + cm109_usb_cleanup(dev); + return error; +} + +static int cm109_usb_suspend(struct usb_interface *intf, pm_message_t message) +{ + struct cm109_dev *dev = usb_get_intfdata(intf); + + info("cm109: usb_suspend (event=%d)", message.event); + + mutex_lock(&dev->pm_mutex); + cm109_stop_traffic(dev); + mutex_unlock(&dev->pm_mutex); + + return 0; +} + +static int cm109_usb_resume(struct usb_interface *intf) +{ + struct cm109_dev *dev = usb_get_intfdata(intf); + + info("cm109: usb_resume"); + + mutex_lock(&dev->pm_mutex); + cm109_restore_state(dev); + mutex_unlock(&dev->pm_mutex); + + return 0; +} + +static int cm109_usb_pre_reset(struct usb_interface *intf) +{ + struct cm109_dev *dev = usb_get_intfdata(intf); + + mutex_lock(&dev->pm_mutex); + + /* + * Make sure input events don't try to toggle buzzer + * while we are resetting + */ + dev->resetting = 1; + smp_wmb(); + + cm109_stop_traffic(dev); + + return 0; +} + +static int cm109_usb_post_reset(struct usb_interface *intf) +{ + struct cm109_dev *dev = usb_get_intfdata(intf); + + dev->resetting = 0; + smp_wmb(); + + cm109_restore_state(dev); + + mutex_unlock(&dev->pm_mutex); + + return 0; +} + +static struct usb_driver cm109_driver = { + .name = "cm109", + .probe = cm109_usb_probe, + .disconnect = cm109_usb_disconnect, + .suspend = cm109_usb_suspend, + .resume = cm109_usb_resume, + .reset_resume = cm109_usb_resume, + .pre_reset = cm109_usb_pre_reset, + .post_reset = cm109_usb_post_reset, + .id_table = cm109_usb_table, + .supports_autosuspend = 1, +}; + +static int __init cm109_select_keymap(void) +{ + /* Load the phone keymap */ + if (!strcasecmp(phone, "kip1000")) { + keymap = keymap_kip1000; + info("Keymap for Komunikate KIP1000 phone loaded"); + } else if (!strcasecmp(phone, "gtalk")) { + keymap = keymap_gtalk; + info("Keymap for Genius G-talk phone loaded"); + } else if (!strcasecmp(phone, "usbph01")) { + keymap = keymap_usbph01; + info("Keymap for Allied-Telesis Corega USBPH01 phone loaded"); + } else { + err("Unsupported phone: %s", phone); + return -EINVAL; + } + + return 0; +} + +static int __init cm109_init(void) +{ + int err; + + err = cm109_select_keymap(); + if (err) + return err; + + err = usb_register(&cm109_driver); + if (err) + return err; + + info(DRIVER_DESC ": " DRIVER_VERSION " (C) " DRIVER_AUTHOR); + + return 0; +} + +static void __exit cm109_exit(void) +{ + usb_deregister(&cm109_driver); +} + +module_init(cm109_init); +module_exit(cm109_exit); + +MODULE_DEVICE_TABLE(usb, cm109_usb_table); + +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL"); -- cgit v0.10.2 From 34a7c48c221676ff8322ca4b8ded84eada34cf12 Mon Sep 17 00:00:00 2001 From: Remi Herilier Date: Fri, 8 Aug 2008 12:13:13 -0400 Subject: Input: wistron - add support for Fujitsu-Siemens Amilo Pro v3505 Wistron button support for Fujitsu-Siemens Amilo Pro Edition V3505. Signed-off-by: Remi Herilier Signed-off-by: Andrew Morton Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/misc/wistron_btns.c b/drivers/input/misc/wistron_btns.c index fe268be..7c8957d 100644 --- a/drivers/input/misc/wistron_btns.c +++ b/drivers/input/misc/wistron_btns.c @@ -277,6 +277,16 @@ static struct key_entry keymap_fs_amilo_pro_v2000[] __initdata = { { KE_END, 0 } }; +static struct key_entry keymap_fs_amilo_pro_v3505[] __initdata = { + { KE_KEY, 0x01, {KEY_HELP} }, /* Fn+F1 */ + { KE_KEY, 0x06, {KEY_DISPLAYTOGGLE} }, /* Fn+F4 */ + { KE_BLUETOOTH, 0x30 }, /* Fn+F10 */ + { KE_KEY, 0x31, {KEY_MAIL} }, /* mail button */ + { KE_KEY, 0x36, {KEY_WWW} }, /* www button */ + { KE_WIFI, 0x78 }, /* satelite dish button */ + { KE_END, 0 } +}; + static struct key_entry keymap_fujitsu_n3510[] __initdata = { { KE_KEY, 0x11, {KEY_PROG1} }, { KE_KEY, 0x12, {KEY_PROG2} }, @@ -618,6 +628,15 @@ static struct dmi_system_id dmi_ids[] __initdata = { }, { .callback = dmi_matched, + .ident = "Fujitsu-Siemens Amilo Pro Edition V3505", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Pro Edition V3505"), + }, + .driver_data = keymap_fs_amilo_pro_v3505 + }, + { + .callback = dmi_matched, .ident = "Fujitsu-Siemens Amilo M7400", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), -- cgit v0.10.2 From 57ffe9d539e0eb741bb9ca8f2834d210e70ee2e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 8 Aug 2008 12:14:34 -0400 Subject: Input: gpio-keys - optimize interrupt handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By passing a gpio_button_data structure to the handler instead of the whole platform_device the search for the right button can go away. Signed-off-by: Uwe Kleine-König Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index be58730..e2809d2 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -56,29 +56,18 @@ static void gpio_check_button(unsigned long _data) static irqreturn_t gpio_keys_isr(int irq, void *dev_id) { - struct platform_device *pdev = dev_id; - struct gpio_keys_platform_data *pdata = pdev->dev.platform_data; - struct gpio_keys_drvdata *ddata = platform_get_drvdata(pdev); - int i; + struct gpio_button_data *bdata = dev_id; + struct gpio_keys_button *button = bdata->button; - for (i = 0; i < pdata->nbuttons; i++) { - struct gpio_keys_button *button = &pdata->buttons[i]; + BUG_ON(irq != gpio_to_irq(button->gpio)); - if (irq == gpio_to_irq(button->gpio)) { - struct gpio_button_data *bdata = &ddata->data[i]; - - if (button->debounce_interval) - mod_timer(&bdata->timer, - jiffies + - msecs_to_jiffies(button->debounce_interval)); - else - gpio_keys_report_event(button, bdata->input); - - return IRQ_HANDLED; - } - } + if (button->debounce_interval) + mod_timer(&bdata->timer, + jiffies + msecs_to_jiffies(button->debounce_interval)); + else + gpio_keys_report_event(button, bdata->input); - return IRQ_NONE; + return IRQ_HANDLED; } static int __devinit gpio_keys_probe(struct platform_device *pdev) @@ -151,7 +140,7 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) IRQF_SAMPLE_RANDOM | IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, button->desc ? button->desc : "gpio_keys", - pdev); + bdata); if (error) { pr_err("gpio-keys: Unable to claim irq %d; error %d\n", irq, error); @@ -178,7 +167,7 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) fail2: while (--i >= 0) { - free_irq(gpio_to_irq(pdata->buttons[i].gpio), pdev); + free_irq(gpio_to_irq(pdata->buttons[i].gpio), &ddata->data[i]); if (pdata->buttons[i].debounce_interval) del_timer_sync(&ddata->data[i].timer); gpio_free(pdata->buttons[i].gpio); @@ -203,7 +192,7 @@ static int __devexit gpio_keys_remove(struct platform_device *pdev) for (i = 0; i < pdata->nbuttons; i++) { int irq = gpio_to_irq(pdata->buttons[i].gpio); - free_irq(irq, pdev); + free_irq(irq, &ddata->data[i]); if (pdata->buttons[i].debounce_interval) del_timer_sync(&ddata->data[i].timer); gpio_free(pdata->buttons[i].gpio); -- cgit v0.10.2 From ce25d7e90c7543f0046c3bdcdcc7594546c57dcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 8 Aug 2008 12:14:36 -0400 Subject: Input: gpio-keys - simplify argument list for report_event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For now this only saves a few instructions (for gpio_keys_report_event, gpio_keys_isr and gpio_check_button one instraction each on ARM using arm-linux-gnu-gcc 4.2.3---I assume this is similar for other arch/compiler combinations). Signed-off-by: Uwe Kleine-König Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index e2809d2..fe22ca3 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -37,9 +37,10 @@ struct gpio_keys_drvdata { struct gpio_button_data data[0]; }; -static void gpio_keys_report_event(struct gpio_keys_button *button, - struct input_dev *input) +static void gpio_keys_report_event(struct gpio_button_data *bdata) { + struct gpio_keys_button *button = bdata->button; + struct input_dev *input = bdata->input; unsigned int type = button->type ?: EV_KEY; int state = (gpio_get_value(button->gpio) ? 1 : 0) ^ button->active_low; @@ -51,7 +52,7 @@ static void gpio_check_button(unsigned long _data) { struct gpio_button_data *data = (struct gpio_button_data *)_data; - gpio_keys_report_event(data->button, data->input); + gpio_keys_report_event(data); } static irqreturn_t gpio_keys_isr(int irq, void *dev_id) @@ -65,7 +66,7 @@ static irqreturn_t gpio_keys_isr(int irq, void *dev_id) mod_timer(&bdata->timer, jiffies + msecs_to_jiffies(button->debounce_interval)); else - gpio_keys_report_event(button, bdata->input); + gpio_keys_report_event(bdata); return IRQ_HANDLED; } -- cgit v0.10.2 From d83d213d9fda671dfd84ea81182742f9e329a6b4 Mon Sep 17 00:00:00 2001 From: Sven Anders Date: Fri, 8 Aug 2008 16:31:31 -0400 Subject: Input: appletouch - prepare for geyser 3/4 handling Split complete function into separate functions for GEYSER1/2 and GEYSER 3/4. Signed-off-by: Sven Anders Signed-off-by: Johannes Berg Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/mouse/appletouch.c b/drivers/input/mouse/appletouch.c index 1f41ae9..36ebe5c 100644 --- a/drivers/input/mouse/appletouch.c +++ b/drivers/input/mouse/appletouch.c @@ -327,11 +327,14 @@ static inline void atp_report_fingers(struct input_dev *input, int fingers) input_report_key(input, BTN_TOOL_TRIPLETAP, fingers > 2); } -static void atp_complete(struct urb *urb) +/* Check URB status and for correct length of data package */ + +#define ATP_URB_STATUS_SUCCESS 0 +#define ATP_URB_STATUS_ERROR 1 +#define ATP_URB_STATUS_ERROR_FATAL 2 + +static int atp_status_check(struct urb *urb) { - int x, y, x_z, y_z, x_f, y_f; - int retval, i, j; - int key; struct atp *dev = urb->context; switch (urb->status) { @@ -351,11 +354,12 @@ static void atp_complete(struct urb *urb) /* This urb is terminated, clean up */ dbg("atp_complete: urb shutting down with status: %d", urb->status); - return; + return ATP_URB_STATUS_ERROR_FATAL; + default: dbg("atp_complete: nonzero urb status received: %d", urb->status); - goto exit; + return ATP_URB_STATUS_ERROR; } /* drop incomplete datasets */ @@ -363,30 +367,33 @@ static void atp_complete(struct urb *urb) dprintk("appletouch: incomplete data package" " (first byte: %d, length: %d).\n", dev->data[0], dev->urb->actual_length); - goto exit; + return ATP_URB_STATUS_ERROR; } - /* reorder the sensors values */ - if (dev->type == ATP_GEYSER3 || dev->type == ATP_GEYSER4) { - memset(dev->xy_cur, 0, sizeof(dev->xy_cur)); + return ATP_URB_STATUS_SUCCESS; +} - /* - * The values are laid out like this: - * -, Y1, Y2, -, Y3, Y4, -, ..., -, X1, X2, -, X3, X4, ... - * '-' is an unused value. - */ +/* + * USB interrupt callback functions + */ - /* read X values */ - for (i = 0, j = 19; i < 20; i += 2, j += 3) { - dev->xy_cur[i] = dev->data[j + 1]; - dev->xy_cur[i + 1] = dev->data[j + 2]; - } - /* read Y values */ - for (i = 0, j = 1; i < 9; i += 2, j += 3) { - dev->xy_cur[ATP_XSENSORS + i] = dev->data[j + 1]; - dev->xy_cur[ATP_XSENSORS + i + 1] = dev->data[j + 2]; - } - } else if (dev->type == ATP_GEYSER2) { +/* Interrupt function for older touchpads: FOUNTAIN/GEYSER1/GEYSER2 */ + +static void atp_complete_geyser_1_2(struct urb *urb) +{ + int x, y, x_z, y_z, x_f, y_f; + int retval, i, j; + int key; + struct atp *dev = urb->context; + int status = atp_status_check(urb); + + if (status == ATP_URB_STATUS_ERROR_FATAL) + return; + else if (status == ATP_URB_STATUS_ERROR) + goto exit; + + /* reorder the sensors values */ + if (dev->type == ATP_GEYSER2) { memset(dev->xy_cur, 0, sizeof(dev->xy_cur)); /* @@ -427,33 +434,146 @@ static void atp_complete(struct urb *urb) /* first sample */ dev->valid = true; dev->x_old = dev->y_old = -1; + + /* Store first sample */ memcpy(dev->xy_old, dev->xy_cur, sizeof(dev->xy_old)); - if (dev->size_detect_done || - dev->type == ATP_GEYSER3) /* No 17" Macbooks (yet) */ + /* Perform size detection, if not done already */ + if (!dev->size_detect_done) { + + /* 17" Powerbooks have extra X sensors */ + for (i = (dev->type == ATP_GEYSER2 ? 15 : 16); + i < ATP_XSENSORS; i++) { + if (!dev->xy_cur[i]) + continue; + + printk(KERN_INFO + "appletouch: 17\" model detected.\n"); + + if (dev->type == ATP_GEYSER2) + input_set_abs_params(dev->input, ABS_X, + 0, + (20 - 1) * + ATP_XFACT - 1, + ATP_FUZZ, 0); + else + input_set_abs_params(dev->input, ABS_X, + 0, + (26 - 1) * + ATP_XFACT - 1, + ATP_FUZZ, 0); + break; + } + + dev->size_detect_done = 1; goto exit; + } + } - /* 17" Powerbooks have extra X sensors */ - for (i = (dev->type == ATP_GEYSER2 ? 15 : 16); - i < ATP_XSENSORS; i++) { - if (!dev->xy_cur[i]) - continue; - - printk(KERN_INFO "appletouch: 17\" model detected.\n"); - if (dev->type == ATP_GEYSER2) - input_set_abs_params(dev->input, ABS_X, 0, - (20 - 1) * - ATP_XFACT - 1, - ATP_FUZZ, 0); - else - input_set_abs_params(dev->input, ABS_X, 0, - (ATP_XSENSORS - 1) * - ATP_XFACT - 1, - ATP_FUZZ, 0); - break; + for (i = 0; i < ATP_XSENSORS + ATP_YSENSORS; i++) { + /* accumulate the change */ + signed char change = dev->xy_old[i] - dev->xy_cur[i]; + dev->xy_acc[i] -= change; + + /* prevent down drifting */ + if (dev->xy_acc[i] < 0) + dev->xy_acc[i] = 0; + } + + memcpy(dev->xy_old, dev->xy_cur, sizeof(dev->xy_old)); + + dbg_dump("accumulator", dev->xy_acc); + + x = atp_calculate_abs(dev->xy_acc, ATP_XSENSORS, + ATP_XFACT, &x_z, &x_f); + y = atp_calculate_abs(dev->xy_acc + ATP_XSENSORS, ATP_YSENSORS, + ATP_YFACT, &y_z, &y_f); + key = dev->data[dev->datalen - 1] & 1; + + if (x && y) { + if (dev->x_old != -1) { + x = (dev->x_old * 3 + x) >> 2; + y = (dev->y_old * 3 + y) >> 2; + dev->x_old = x; + dev->y_old = y; + + if (debug > 1) + printk(KERN_DEBUG "appletouch: " + "X: %3d Y: %3d Xz: %3d Yz: %3d\n", + x, y, x_z, y_z); + + input_report_key(dev->input, BTN_TOUCH, 1); + input_report_abs(dev->input, ABS_X, x); + input_report_abs(dev->input, ABS_Y, y); + input_report_abs(dev->input, ABS_PRESSURE, + min(ATP_PRESSURE, x_z + y_z)); + atp_report_fingers(dev->input, max(x_f, y_f)); } + dev->x_old = x; + dev->y_old = y; + + } else if (!x && !y) { + + dev->x_old = dev->y_old = -1; + input_report_key(dev->input, BTN_TOUCH, 0); + input_report_abs(dev->input, ABS_PRESSURE, 0); + atp_report_fingers(dev->input, 0); + + /* reset the accumulator on release */ + memset(dev->xy_acc, 0, sizeof(dev->xy_acc)); + } + + input_report_key(dev->input, BTN_LEFT, key); + input_sync(dev->input); + + exit: + retval = usb_submit_urb(dev->urb, GFP_ATOMIC); + if (retval) + err("atp_complete: usb_submit_urb failed with result %d", + retval); +} + +/* Interrupt function for older touchpads: GEYSER3/GEYSER4 */ + +static void atp_complete_geyser_3_4(struct urb *urb) +{ + int x, y, x_z, y_z, x_f, y_f; + int retval, i, j; + int key; + struct atp *dev = urb->context; + int status = atp_status_check(urb); + + if (status == ATP_URB_STATUS_ERROR_FATAL) + return; + else if (status == ATP_URB_STATUS_ERROR) + goto exit; + + /* Reorder the sensors values: + * + * The values are laid out like this: + * -, Y1, Y2, -, Y3, Y4, -, ..., -, X1, X2, -, X3, X4, ... + * '-' is an unused value. + */ + + /* read X values */ + for (i = 0, j = 19; i < 20; i += 2, j += 3) { + dev->xy_cur[i] = dev->data[j + 1]; + dev->xy_cur[i + 1] = dev->data[j + 2]; + } + /* read Y values */ + for (i = 0, j = 1; i < 9; i += 2, j += 3) { + dev->xy_cur[ATP_XSENSORS + i] = dev->data[j + 1]; + dev->xy_cur[ATP_XSENSORS + i + 1] = dev->data[j + 2]; + } + + dbg_dump("sample", dev->xy_cur); + + if (!dev->valid) { + /* first sample */ + dev->valid = true; + dev->x_old = dev->y_old = -1; + memcpy(dev->xy_old, dev->xy_cur, sizeof(dev->xy_old)); - dev->size_detect_done = 1; goto exit; } @@ -514,28 +634,26 @@ static void atp_complete(struct urb *urb) input_sync(dev->input); /* - * Many Geysers will continue to send packets continually after + * Geysers 3/4 will continue to send packets continually after * the first touch unless reinitialised. Do so if it's been * idle for a while in order to avoid waking the kernel up - * several hundred times a second. Re-initialization does not - * work on Fountain touchpads. + * several hundred times a second. */ - if (dev->type != ATP_FOUNTAIN) { - /* - * Button must not be pressed when entering suspend, - * otherwise we will never release the button. - */ - if (!x && !y && !key) { - dev->idlecount++; - if (dev->idlecount == 10) { - dev->valid = false; - schedule_work(&dev->work); - /* Don't resubmit urb here, wait for reinit */ - return; - } - } else - dev->idlecount = 0; - } + + /* + * Button must not be pressed when entering suspend, + * otherwise we will never release the button. + */ + if (!x && !y && !key) { + dev->idlecount++; + if (dev->idlecount == 10) { + dev->valid = false; + schedule_work(&dev->work); + /* Don't resubmit urb here, wait for reinit */ + return; + } + } else + dev->idlecount = 0; exit: retval = usb_submit_urb(dev->urb, GFP_ATOMIC); @@ -632,9 +750,19 @@ static int atp_probe(struct usb_interface *iface, if (!dev->data) goto err_free_urb; - usb_fill_int_urb(dev->urb, udev, - usb_rcvintpipe(udev, int_in_endpointAddr), - dev->data, dev->datalen, atp_complete, dev, 1); + /* Select the USB complete (callback) function */ + if (dev->type == ATP_FOUNTAIN || + dev->type == ATP_GEYSER1 || + dev->type == ATP_GEYSER2) + usb_fill_int_urb(dev->urb, udev, + usb_rcvintpipe(udev, int_in_endpointAddr), + dev->data, dev->datalen, + atp_complete_geyser_1_2, dev, 1); + else + usb_fill_int_urb(dev->urb, udev, + usb_rcvintpipe(udev, int_in_endpointAddr), + dev->data, dev->datalen, + atp_complete_geyser_3_4, dev, 1); error = atp_handle_geyser(dev); if (error) -- cgit v0.10.2 From 82a196f481661170b4982dc7e68a12e9253309d0 Mon Sep 17 00:00:00 2001 From: Sven Anders Date: Fri, 8 Aug 2008 16:31:33 -0400 Subject: Input: appletouch - handle geyser 3/4 status bits Implement support for status bits on Geyser 3/4. Signed-off-by: Sven Anders Signed-off-by: Johannes Berg Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/mouse/appletouch.c b/drivers/input/mouse/appletouch.c index 36ebe5c..079816e 100644 --- a/drivers/input/mouse/appletouch.c +++ b/drivers/input/mouse/appletouch.c @@ -136,12 +136,28 @@ MODULE_DEVICE_TABLE(usb, atp_table); #define ATP_GEYSER_MODE_REQUEST_INDEX 0 #define ATP_GEYSER_MODE_VENDOR_VALUE 0x04 +/** + * enum atp_status_bits - status bit meanings + * + * These constants represent the meaning of the status bits. + * (only Geyser 3/4) + * + * @ATP_STATUS_BUTTON: The button was pressed + * @ATP_STATUS_BASE_UPDATE: Update of the base values (untouched pad) + * @ATP_STATUS_FROM_RESET: Reset previously performed + */ +enum atp_status_bits { + ATP_STATUS_BUTTON = BIT(0), + ATP_STATUS_BASE_UPDATE = BIT(2), + ATP_STATUS_FROM_RESET = BIT(4), +}; + /* Structure to hold all of our device specific stuff */ struct atp { char phys[64]; struct usb_device *udev; /* usb device */ struct urb *urb; /* usb request block */ - signed char *data; /* transferred data */ + u8 *data; /* transferred data */ struct input_dev *input; /* input dev */ enum atp_touchpad_type type; /* type of touchpad */ bool open; @@ -251,8 +267,6 @@ static void atp_reinit(struct work_struct *work) int retval; dprintk("appletouch: putting appletouch to sleep (reinit)\n"); - dev->idlecount = 0; - atp_geyser_init(udev); retval = usb_submit_urb(dev->urb, GFP_ATOMIC); @@ -488,7 +502,7 @@ static void atp_complete_geyser_1_2(struct urb *urb) ATP_XFACT, &x_z, &x_f); y = atp_calculate_abs(dev->xy_acc + ATP_XSENSORS, ATP_YSENSORS, ATP_YFACT, &y_z, &y_f); - key = dev->data[dev->datalen - 1] & 1; + key = dev->data[dev->datalen - 1] & ATP_STATUS_BUTTON; if (x && y) { if (dev->x_old != -1) { @@ -568,34 +582,38 @@ static void atp_complete_geyser_3_4(struct urb *urb) dbg_dump("sample", dev->xy_cur); - if (!dev->valid) { - /* first sample */ - dev->valid = true; - dev->x_old = dev->y_old = -1; - memcpy(dev->xy_old, dev->xy_cur, sizeof(dev->xy_old)); + /* Just update the base values (i.e. touchpad in untouched state) */ + if (dev->data[dev->datalen - 1] & ATP_STATUS_BASE_UPDATE) { + dprintk(KERN_DEBUG "appletouch: updated base values\n"); + + memcpy(dev->xy_old, dev->xy_cur, sizeof(dev->xy_old)); goto exit; } for (i = 0; i < ATP_XSENSORS + ATP_YSENSORS; i++) { - /* accumulate the change */ - signed char change = dev->xy_old[i] - dev->xy_cur[i]; - dev->xy_acc[i] -= change; + /* calculate the change */ + dev->xy_acc[i] = dev->xy_cur[i] - dev->xy_old[i]; + + /* this is a round-robin value, so couple with that */ + if (dev->xy_acc[i] > 127) + dev->xy_acc[i] -= 256; + + if (dev->xy_acc[i] < -127) + dev->xy_acc[i] += 256; /* prevent down drifting */ if (dev->xy_acc[i] < 0) dev->xy_acc[i] = 0; } - memcpy(dev->xy_old, dev->xy_cur, sizeof(dev->xy_old)); - dbg_dump("accumulator", dev->xy_acc); x = atp_calculate_abs(dev->xy_acc, ATP_XSENSORS, ATP_XFACT, &x_z, &x_f); y = atp_calculate_abs(dev->xy_acc + ATP_XSENSORS, ATP_YSENSORS, ATP_YFACT, &y_z, &y_f); - key = dev->data[dev->datalen - 1] & 1; + key = dev->data[dev->datalen - 1] & ATP_STATUS_BUTTON; if (x && y) { if (dev->x_old != -1) { @@ -647,7 +665,8 @@ static void atp_complete_geyser_3_4(struct urb *urb) if (!x && !y && !key) { dev->idlecount++; if (dev->idlecount == 10) { - dev->valid = false; + dev->x_old = dev->y_old = -1; + dev->idlecount = 0; schedule_work(&dev->work); /* Don't resubmit urb here, wait for reinit */ return; @@ -879,8 +898,6 @@ static int atp_suspend(struct usb_interface *iface, pm_message_t message) struct atp *dev = usb_get_intfdata(iface); usb_kill_urb(dev->urb); - dev->valid = false; - return 0; } -- cgit v0.10.2 From 90936cfe6c8f7e90a6f8b0c5cb44d3a012dfd313 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:07:44 +0200 Subject: x86, tsc: fix section mismatch warning WARNING: vmlinux.o(.text+0x7950): Section mismatch in reference from the function native_calibrate_tsc() to the function .init.text:tsc_read_refs() The function native_calibrate_tsc() references the function __init tsc_read_refs(). This is often because native_calibrate_tsc lacks a __init annotation or the annotation of tsc_read_refs is wrong. tsc_read_refs is called from native_calibrate_tsc which is not __init and native_calibrate_tsc cannot be marked __init Signed-off-by: Marcin Slusarz Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7603c05..46af716 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -104,7 +104,7 @@ __setup("notsc", notsc_setup); /* * Read TSC and the reference counters. Take care of SMI disturbance */ -static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) +static u64 tsc_read_refs(u64 *pm, u64 *hpet) { u64 t1, t2; int i; -- cgit v0.10.2 From 85a14437ed24244c78f9a70d58b8299753b03c92 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:12:37 +0200 Subject: x86: fix MP_processor_info section mismatch warning WARNING: arch/x86/kernel/built-in.o(.cpuinit.text+0x1fe7): Section mismatch in reference from the function MP_processor_info() to the variable .init.data:x86_quirks The function __cpuinit MP_processor_info() references a variable __initdata x86_quirks. If x86_quirks is only used by MP_processor_info then annotate x86_quirks with a matching annotation. MP_processor_info uses x86_quirks which is __init and is used only from smp_read_mpc and construct_default_ISA_mptable which are __init Signed-off-by: Marcin Slusarz Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 6ae005c..78509ee 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -49,7 +49,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) return sum & 0xFF; } -static void __cpuinit MP_processor_info(struct mpc_config_processor *m) +static void __init MP_processor_info(struct mpc_config_processor *m) { int apicid; char *bootup_cpu = ""; -- cgit v0.10.2 From bafc1dae8215c862c2e6ae913ddadc20581e59b9 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:11:13 +0200 Subject: x86: mmconf: fix section mismatch warning WARNING: arch/x86/kernel/built-in.o(.cpuinit.text+0x1591): Section mismatch in reference from the function init_amd() to the function .init.text:check_enable_amd_mmconf_dmi() The function __cpuinit init_amd() references a function __init check_enable_amd_mmconf_dmi(). If check_enable_amd_mmconf_dmi is only used by init_amd then annotate check_enable_amd_mmconf_dmi with a matching annotation. check_enable_amd_mmconf_dmi is only called from init_amd which is __cpuinit Signed-off-by: Marcin Slusarz Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index fdfdc55..efc2f36 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -238,7 +238,7 @@ static struct dmi_system_id __devinitdata mmconf_dmi_table[] = { {} }; -void __init check_enable_amd_mmconf_dmi(void) +void __cpuinit check_enable_amd_mmconf_dmi(void) { dmi_check_system(mmconf_dmi_table); } diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h index 95beda0..e293ab8 100644 --- a/include/asm-x86/mmconfig.h +++ b/include/asm-x86/mmconfig.h @@ -3,7 +3,7 @@ #ifdef CONFIG_PCI_MMCONFIG extern void __cpuinit fam10h_check_enable_mmcfg(void); -extern void __init check_enable_amd_mmconf_dmi(void); +extern void __cpuinit check_enable_amd_mmconf_dmi(void); #else static inline void fam10h_check_enable_mmcfg(void) { } static inline void check_enable_amd_mmconf_dmi(void) { } -- cgit v0.10.2 From d406d21d90dce2e66c7eb4a44605aac947fe55fb Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 11 Aug 2008 00:09:38 +0200 Subject: x86: mpparse.c: fix section mismatch warning WARNING: vmlinux.o(.text+0x118f7): Section mismatch in reference from the function construct_ioapic_table() to the function .init.text:MP_bus_info() The function construct_ioapic_table() references the function __init MP_bus_info(). This is often because construct_ioapic_table lacks a __init annotation or the annotation of MP_bus_info is wrong. construct_ioapic_table is called only from construct_default_ISA_mptable which is __init Signed-off-by: Marcin Slusarz Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 78509ee..2c1963b 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -484,7 +484,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) } -static void construct_ioapic_table(int mpc_default_type) +static void __init construct_ioapic_table(int mpc_default_type) { struct mpc_config_ioapic ioapic; struct mpc_config_bus bus; @@ -529,7 +529,7 @@ static void construct_ioapic_table(int mpc_default_type) construct_default_ioirq_mptable(mpc_default_type); } #else -static inline void construct_ioapic_table(int mpc_default_type) { } +static inline void __init construct_ioapic_table(int mpc_default_type) { } #endif static inline void __init construct_default_ISA_mptable(int mpc_default_type) -- cgit v0.10.2 From 4c942654a4514d7d0a9b592a7d1b198a212e8a03 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Sun, 10 Aug 2008 20:57:45 +0800 Subject: arch/x86/kernel/acpi/boot.c: removed duplicated #include Removed duplicated include file in arch/x86/kernel/acpi/boot.c. Signed-off-by: Huang Weiyi Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index fa88a1d..d8d1189 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -58,7 +58,6 @@ EXPORT_SYMBOL(acpi_disabled); #ifdef CONFIG_X86_64 #include -#include #else /* X86 */ -- cgit v0.10.2 From 8aeb4022633f7d0eca5e13a9622bd73df92bbf2a Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Sun, 10 Aug 2008 21:09:22 +0800 Subject: arch/x86/kernel/cpuid.c: removed duplicated #include Removed duplicated include file in arch/x86/kernel/cpuid.c. Signed-off-by: Huang Weiyi Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 14b11b3..3fa4e92 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include -- cgit v0.10.2 From 67182ae1c42206e516f7efb292b745e826497b24 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 10 Aug 2008 18:35:38 -0700 Subject: rcu, debug: detect stalled grace periods this is a diagnostic patch for Classic RCU. The approach is to record a timestamp at the beginning of the grace period (in rcu_start_batch()), then have rcu_check_callbacks() complain if: 1. it is running on a CPU that has holding up grace periods for a long time (say one second). This will identify the culprit assuming that the culprit has not disabled hardware irqs, instruction execution, or some such. 2. it is running on a CPU that is not holding up grace periods, but grace periods have been held up for an even longer time (say two seconds). It is enabled via the default-off CONFIG_DEBUG_RCU_STALL kernel parameter. Rather than exponential backoff, it backs off to once per 30 seconds. My feeling upon thinking on it was that if you have stalled RCU grace periods for that long, a few extra printk() messages are probably the least of your worries... Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Yinghai Lu Cc: David Witbrodt Signed-off-by: Ingo Molnar diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index 04c7281..1658995 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -46,6 +46,9 @@ struct rcu_ctrlblk { long cur; /* Current batch number. */ long completed; /* Number of the last completed batch */ long pending; /* Number of the last pending batch */ +#ifdef CONFIG_DEBUG_RCU_STALL + unsigned long gp_check; /* Time grace period should end, in seconds. */ +#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */ int signaled; diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index d427114..d7ec731 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -47,6 +47,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key rcu_lock_key; @@ -286,6 +287,81 @@ static void rcu_do_batch(struct rcu_data *rdp) * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace * period (if necessary). */ + +#ifdef CONFIG_DEBUG_RCU_STALL + +static inline void record_gp_check_time(struct rcu_ctrlblk *rcp) +{ + rcp->gp_check = get_seconds() + 3; +} +static void print_other_cpu_stall(struct rcu_ctrlblk *rcp) +{ + int cpu; + long delta; + + /* Only let one CPU complain about others per time interval. */ + + spin_lock(&rcp->lock); + delta = get_seconds() - rcp->gp_check; + if (delta < 2L || + cpus_empty(rcp->cpumask)) { + spin_unlock(&rcp->lock); + return; + rcp->gp_check = get_seconds() + 30; + } + spin_unlock(&rcp->lock); + + /* OK, time to rat on our buddy... */ + + printk(KERN_ERR "RCU detected CPU stalls:"); + for_each_cpu_mask(cpu, rcp->cpumask) + printk(" %d", cpu); + printk(" (detected by %d, t=%lu/%lu)\n", + smp_processor_id(), get_seconds(), rcp->gp_check); +} +static void print_cpu_stall(struct rcu_ctrlblk *rcp) +{ + printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu)\n", + smp_processor_id(), get_seconds(), rcp->gp_check); + dump_stack(); + spin_lock(&rcp->lock); + if ((long)(get_seconds() - rcp->gp_check) >= 0L) + rcp->gp_check = get_seconds() + 30; + spin_unlock(&rcp->lock); +} +static inline void check_cpu_stall(struct rcu_ctrlblk *rcp, + struct rcu_data *rdp) +{ + long delta; + + delta = get_seconds() - rcp->gp_check; + if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0L) { + + /* We haven't checked in, so go dump stack. */ + + print_cpu_stall(rcp); + + } else if (!cpus_empty(rcp->cpumask) && delta >= 2L) { + + /* They had two seconds to dump stack, so complain. */ + + print_other_cpu_stall(rcp); + + } +} + +#else /* #ifdef CONFIG_DEBUG_RCU_STALL */ + +static inline void record_gp_check_time(struct rcu_ctrlblk *rcp) +{ +} +static inline void check_cpu_stall(struct rcu_ctrlblk *rcp, + struct rcu_data *rdp) +{ +} + +#endif /* #else #ifdef CONFIG_DEBUG_RCU_STALL */ + /* * Register a new batch of callbacks, and start it up if there is currently no * active batch and the batch to be registered has not already occurred. @@ -296,6 +372,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp) if (rcp->cur != rcp->pending && rcp->completed == rcp->cur) { rcp->cur++; + record_gp_check_time(rcp); /* * Accessing nohz_cpu_mask before incrementing rcp->cur needs a @@ -489,6 +566,9 @@ static void rcu_process_callbacks(struct softirq_action *unused) static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) { + /* Check for CPU stalls, if enabled. */ + check_cpu_stall(rcp, rdp); + if (rdp->nxtlist) { /* * This cpu has pending rcu entries and the grace period diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e1d4764..2fb6d90 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -597,6 +597,19 @@ config RCU_TORTURE_TEST_RUNNABLE Say N here if you want the RCU torture tests to start only after being manually enabled via /proc. +config RCU_CPU_STALL + bool "Check for stalled CPUs delaying RCU grace periods" + depends on CLASSIC_RCU + default n + help + This option causes RCU to printk information on which + CPUs are delaying the current grace period, but only when + the grace period extends for excessive time periods. + + Say Y if you want RCU to perform such checks. + + Say N if you are unsure. + config KPROBES_SANITY_TEST bool "Kprobes sanity tests" depends on DEBUG_KERNEL -- cgit v0.10.2 From 78635fc739b1254f3e0362ac430edbdd2cff01dc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 11 Aug 2008 13:34:15 +0200 Subject: rcu, debug: detect stalled grace periods, cleanups small cleanups. Signed-off-by: Ingo Molnar diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index d7ec731..56b8712 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -294,6 +294,7 @@ static inline void record_gp_check_time(struct rcu_ctrlblk *rcp) { rcp->gp_check = get_seconds() + 3; } + static void print_other_cpu_stall(struct rcu_ctrlblk *rcp) { int cpu; @@ -303,11 +304,9 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp) spin_lock(&rcp->lock); delta = get_seconds() - rcp->gp_check; - if (delta < 2L || - cpus_empty(rcp->cpumask)) { + if (delta < 2L || cpus_empty(rcp->cpumask)) { spin_unlock(&rcp->lock); return; - rcp->gp_check = get_seconds() + 30; } spin_unlock(&rcp->lock); @@ -319,6 +318,7 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp) printk(" (detected by %d, t=%lu/%lu)\n", smp_processor_id(), get_seconds(), rcp->gp_check); } + static void print_cpu_stall(struct rcu_ctrlblk *rcp) { printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu)\n", @@ -329,8 +329,8 @@ static void print_cpu_stall(struct rcu_ctrlblk *rcp) rcp->gp_check = get_seconds() + 30; spin_unlock(&rcp->lock); } -static inline void check_cpu_stall(struct rcu_ctrlblk *rcp, - struct rcu_data *rdp) + +static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) { long delta; @@ -341,12 +341,11 @@ static inline void check_cpu_stall(struct rcu_ctrlblk *rcp, print_cpu_stall(rcp); - } else if (!cpus_empty(rcp->cpumask) && delta >= 2L) { - - /* They had two seconds to dump stack, so complain. */ - - print_other_cpu_stall(rcp); - + } else { + if (!cpus_empty(rcp->cpumask) && delta >= 2L) { + /* They had two seconds to dump stack, so complain. */ + print_other_cpu_stall(rcp); + } } } @@ -355,8 +354,9 @@ static inline void check_cpu_stall(struct rcu_ctrlblk *rcp, static inline void record_gp_check_time(struct rcu_ctrlblk *rcp) { } -static inline void check_cpu_stall(struct rcu_ctrlblk *rcp, - struct rcu_data *rdp) + +static inline void +check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) { } -- cgit v0.10.2 From b845b517b5e3706a3729f6ea83b88ab85f0725b0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Aug 2008 21:47:09 +0200 Subject: printk: robustify printk Avoid deadlocks against rq->lock and xtime_lock by deferring the klogd wakeup by polling from the timer tick. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/include/linux/kernel.h b/include/linux/kernel.h index aaa998f..113ac8d 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -200,6 +200,8 @@ extern struct ratelimit_state printk_ratelimit_state; extern int printk_ratelimit(void); extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); +extern void printk_tick(void); +extern int printk_needs_cpu(int); #else static inline int vprintk(const char *s, va_list args) __attribute__ ((format (printf, 1, 0))); @@ -211,6 +213,8 @@ static inline int printk_ratelimit(void) { return 0; } static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \ unsigned int interval_msec) \ { return false; } +static inline void printk_tick(void) { } +static inline int printk_needs_cpu(int) { return 0; } #endif extern void asmlinkage __attribute__((format(printf, 1, 2))) diff --git a/kernel/printk.c b/kernel/printk.c index b51b156..655cc2c 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -982,10 +982,25 @@ int is_console_locked(void) return console_locked; } -void wake_up_klogd(void) +static DEFINE_PER_CPU(int, printk_pending); + +void printk_tick(void) { - if (!oops_in_progress && waitqueue_active(&log_wait)) + if (__get_cpu_var(printk_pending)) { + __get_cpu_var(printk_pending) = 0; wake_up_interruptible(&log_wait); + } +} + +int printk_needs_cpu(int cpu) +{ + return per_cpu(printk_pending, cpu); +} + +void wake_up_klogd(void) +{ + if (waitqueue_active(&log_wait)) + __get_cpu_var(printk_pending) = 1; } /** diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 825b4c0..c13d4f1 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -255,7 +255,7 @@ void tick_nohz_stop_sched_tick(int inidle) next_jiffies = get_next_timer_interrupt(last_jiffies); delta_jiffies = next_jiffies - last_jiffies; - if (rcu_needs_cpu(cpu)) + if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu)) delta_jiffies = 1; /* * Do not stop the tick, if we are only one off diff --git a/kernel/timer.c b/kernel/timer.c index 03bc7f1..510fe69 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -978,6 +978,7 @@ void update_process_times(int user_tick) run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); + printk_tick(); scheduler_tick(); run_posix_cpu_timers(p); } -- cgit v0.10.2 From ced9cd40ac14111befd6b0c73ec90106c22a3fd7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 11 Aug 2008 14:38:12 +0200 Subject: printk: robustify printk, fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: include/linux/kernel.h: In function ‘printk_needs_cpu': include/linux/kernel.h:217: error: parameter name omitted Signed-off-by: Ingo Molnar diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 113ac8d..3652a45 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -200,8 +200,6 @@ extern struct ratelimit_state printk_ratelimit_state; extern int printk_ratelimit(void); extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); -extern void printk_tick(void); -extern int printk_needs_cpu(int); #else static inline int vprintk(const char *s, va_list args) __attribute__ ((format (printf, 1, 0))); @@ -213,10 +211,11 @@ static inline int printk_ratelimit(void) { return 0; } static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \ unsigned int interval_msec) \ { return false; } -static inline void printk_tick(void) { } -static inline int printk_needs_cpu(int) { return 0; } #endif +extern int printk_needs_cpu(int cpu); +extern void printk_tick(void); + extern void asmlinkage __attribute__((format(printf, 1, 2))) early_printk(const char *fmt, ...); -- cgit v0.10.2 From 2ae111cdd8d83ebf9de72e36e68a8c84b6ebbeea Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 11 Aug 2008 18:34:08 +0400 Subject: x86: apic interrupts - move assignments to irqinit_32.c, v2 64bit mode APIC interrupt handlers are set within irqinit_64.c. Lets do tha same for 32bit mode which would help in furter code merging. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index f93c18f..9e341c9 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1354,54 +1354,6 @@ void smp_error_interrupt(struct pt_regs *regs) irq_exit(); } -#ifdef CONFIG_SMP -void __init smp_intr_init(void) -{ - /* - * IRQ0 must be given a fixed assignment and initialized, - * because it's used before the IO-APIC is set up. - */ - set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); - - /* - * The reschedule interrupt is a CPU-to-CPU reschedule-helper - * IPI, driven by wakeup. - */ - alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - - /* IPI for invalidation */ - alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); - - /* IPI for generic function call */ - alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - - /* IPI for single call function */ - set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); -} -#endif - -/* - * Initialize APIC interrupts - */ -void __init apic_intr_init(void) -{ -#ifdef CONFIG_SMP - smp_intr_init(); -#endif - /* self generated IPI for local APIC timer */ - alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* IPI vectors for APIC spurious and error interrupts */ - alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); - - /* thermal monitor LVT interrupt */ -#ifdef CONFIG_X86_MCE_P4THERMAL - alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); -#endif -} - /** * connect_bsp_APIC - attach the APIC to the interrupt system */ diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index d669142..9200a1e 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -74,6 +74,15 @@ void __init init_ISA_irqs (void) } } +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ +static struct irqaction irq2 = { + .handler = no_action, + .mask = CPU_MASK_NONE, + .name = "cascade", +}; + /* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); @@ -98,6 +107,46 @@ void __init native_init_IRQ(void) set_intr_gate(vector, interrupt[i]); } +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) + /* + * IRQ0 must be given a fixed assignment and initialized, + * because it's used before the IO-APIC is set up. + */ + set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); + + /* + * The reschedule interrupt is a CPU-to-CPU reschedule-helper + * IPI, driven by wakeup. + */ + alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); + + /* IPI for invalidation */ + alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + + /* IPI for generic function call */ + alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); + + /* IPI for single call function */ + set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); +#endif + +#ifdef CONFIG_X86_LOCAL_APIC + /* self generated IPI for local APIC timer */ + alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + + /* IPI vectors for APIC spurious and error interrupts */ + alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); +#endif + +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) + /* thermal monitor LVT interrupt */ + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); +#endif + + if (!acpi_ioapic) + setup_irq(2, &irq2); + /* setup after call gates are initialised (usually add in * the architecture specific gates) */ diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 3d31783..b00f5ad 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -36,15 +36,6 @@ void __init pre_intr_init_hook(void) init_ISA_irqs(); } -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ -static struct irqaction irq2 = { - .handler = no_action, - .mask = CPU_MASK_NONE, - .name = "cascade", -}; - /** * intr_init_hook - post gate setup interrupt initialisation * @@ -60,12 +51,6 @@ void __init intr_init_hook(void) if (x86_quirks->arch_intr_init()) return; } -#ifdef CONFIG_X86_LOCAL_APIC - apic_intr_init(); -#endif - - if (!acpi_ioapic) - setup_irq(2, &irq2); } /** diff --git a/include/asm-x86/arch_hooks.h b/include/asm-x86/arch_hooks.h index 72adc3a..de4596b 100644 --- a/include/asm-x86/arch_hooks.h +++ b/include/asm-x86/arch_hooks.h @@ -12,8 +12,6 @@ /* these aren't arch hooks, they are generic routines * that can be used by the hooks */ extern void init_ISA_irqs(void); -extern void apic_intr_init(void); -extern void smp_intr_init(void); extern irqreturn_t timer_interrupt(int irq, void *dev_id); /* these are the defined hooks */ -- cgit v0.10.2 From 7c13e6a3d15a4ebcc3f40df5f4d19665479f8ca3 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Mon, 11 Aug 2008 10:46:46 -0500 Subject: x86: remove EXPERIMENTAL restriction from CONFIG_HOTPLUG_CPU This removes the EXPERIMENTAL restriction from CONFIG_HOTPLUG_CPU on the x86 architecture. Signed-off-by: Dimitri Sivanich Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3d0f2b6..7917962 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1371,14 +1371,14 @@ config PHYSICAL_ALIGN Don't change this unless you know what you are doing. config HOTPLUG_CPU - bool "Support for suspend on SMP and hot-pluggable CPUs (EXPERIMENTAL)" - depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER + bool "Support for hot-pluggable CPUs" + depends on SMP && HOTPLUG && !X86_VOYAGER ---help--- - Say Y here to experiment with turning CPUs off and on, and to - enable suspend on SMP systems. CPUs can be controlled through - /sys/devices/system/cpu. - Say N if you want to disable CPU hotplug and don't need to - suspend. + Say Y here to allow turning CPUs off and on. CPUs can be + controlled through /sys/devices/system/cpu. + ( Note: power management support will enable this option + automatically on SMP systems. ) + Say N if you want to disable CPU hotplug. config COMPAT_VDSO def_bool y -- cgit v0.10.2 From 012f09e7942716d5f4339f1fd9a831a485bb1d4a Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 6 Aug 2008 16:23:08 +0200 Subject: x86: compile pat debugfs interface only if CONFIG_X86_PAT is set Recently I've run a kernel w/o PAT support and wondered why there was a file "x86/pat_memtype_list" in debugfs. Of course it's empty if PAT is disabled ... so just get rid of this interface if PAT is disabled. Signed-off-by: Andreas Herrmann Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 2fe3091..647b1c4 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -492,7 +492,7 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) free_memtype(addr, addr + size); } -#if defined(CONFIG_DEBUG_FS) +#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) /* get Nth element of the linked list */ static struct memtype *memtype_get_idx(loff_t pos) @@ -576,4 +576,4 @@ static int __init pat_memtype_list_init(void) late_initcall(pat_memtype_list_init); -#endif /* CONFIG_DEBUG_FS */ +#endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */ -- cgit v0.10.2 From 59f09ba2b62e6f89beeb4c8fc2c83fe14321dda9 Mon Sep 17 00:00:00 2001 From: Philipp Kohlbecher Date: Wed, 6 Aug 2008 15:25:26 +0200 Subject: x86: fix comment in protected mode header Comments in arch/x86/boot/compressed/head_32.S erroneously refer to the real mode pointer as the second and the heap area as the third argument to decompress_kernel(). In fact, these have been the first and second argument, respectively, since v2.6.20. This patch corrects the comments. It introduces no code changes. Signed-off-by: Philipp Kohlbecher Signed-off-by: Ingo Molnar diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index ba7736c..29c5fbf 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -137,14 +137,15 @@ relocated: */ movl output_len(%ebx), %eax pushl %eax + # push arguments for decompress_kernel: pushl %ebp # output address movl input_len(%ebx), %eax pushl %eax # input_len leal input_data(%ebx), %eax pushl %eax # input_data leal boot_heap(%ebx), %eax - pushl %eax # heap area as third argument - pushl %esi # real mode pointer as second arg + pushl %eax # heap area + pushl %esi # real mode pointer call decompress_kernel addl $20, %esp popl %ecx -- cgit v0.10.2 From 49800efcb17afdf973f33e8aa8807b7f83993cc6 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 6 Aug 2008 10:27:30 +0200 Subject: x86: pda_init(): fix memory leak when using CPU hotplug pda->irqstackptr is allocated whenever a CPU is set online. But it is never freed. This results in a memory leak of 16K for each CPU offline/online cycle. Fix is to allocate pda->irqstackptr only once. Signed-off-by: Andreas Herrmann Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 6f9b892..8396fd7 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -493,17 +493,20 @@ void pda_init(int cpu) /* others are initialized in smpboot.c */ pda->pcurrent = &init_task; pda->irqstackptr = boot_cpu_stack; + pda->irqstackptr += IRQSTACKSIZE - 64; } else { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", cpu); + if (!pda->irqstackptr) { + pda->irqstackptr = (char *) + __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); + if (!pda->irqstackptr) + panic("cannot allocate irqstack for cpu %d", + cpu); + pda->irqstackptr += IRQSTACKSIZE - 64; + } if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) pda->nodenumber = cpu_to_node(cpu); } - - pda->irqstackptr += IRQSTACKSIZE-64; } char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + -- cgit v0.10.2 From b55793f7528ce1b73c25b3ac8a86a6cda2a0f9a4 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 6 Aug 2008 10:29:37 +0200 Subject: x86: cpu_init(): fix memory leak when using CPU hotplug Exception stacks are allocated each time a CPU is set online. But the allocated space is never freed. Thus with one CPU hotplug offline/online cycle there is a memory leak of 24K (6 pages) for a CPU. Fix is to allocate exception stacks only once -- when the CPU is set online for the first time. Signed-off-by: Andreas Herrmann Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 8396fd7..cc6efe8 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -606,19 +606,22 @@ void __cpuinit cpu_init(void) /* * set up and load the per-CPU TSS */ - for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (!orig_ist->ist[0]) { static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER }; - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception stack %ld %d\n", - v, cpu); + for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (cpu) { + estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); + if (!estacks) + panic("Cannot allocate exception " + "stack %ld %d\n", v, cpu); + } + estacks += PAGE_SIZE << order[v]; + orig_ist->ist[v] = t->x86_tss.ist[v] = + (unsigned long)estacks; } - estacks += PAGE_SIZE << order[v]; - orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; } t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); -- cgit v0.10.2 From 99809963c99e1ed868d9ebeb4a5e7ee1cbe0309f Mon Sep 17 00:00:00 2001 From: Jeff Chua Date: Wed, 6 Aug 2008 19:09:53 +0800 Subject: x86: make sparsemem more available With CONFIG_X86_PC, I can set CONFIG_SPARSEMEM=y. With CONFIG_X86_GENERICARCH, CONFIG_SPARSEMEM depends on CONFIG_NUMA. I'm using the patch below to enable sparsemem instead of flatmem. System booted and is running. Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7917962..e2305c7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1035,7 +1035,7 @@ config HAVE_ARCH_ALLOC_REMAP config ARCH_FLATMEM_ENABLE def_bool y - depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC && !NUMA + depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA config ARCH_DISCONTIGMEM_ENABLE def_bool y @@ -1051,7 +1051,7 @@ config ARCH_SPARSEMEM_DEFAULT config ARCH_SPARSEMEM_ENABLE def_bool y - depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) + depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) || X86_GENERICARCH select SPARSEMEM_STATIC if X86_32 select SPARSEMEM_VMEMMAP_ENABLE if X86_64 -- cgit v0.10.2 From c1e817d03a7de57a963654c35e6e80af9a5dbff5 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 22 Jul 2008 22:58:03 +0100 Subject: GFS2: Fix debugfs glock file iterator Due to an incorrect iterator, some glocks were being missed from the glock dumps obtained via debugfs. This patch fixes the problem and ensures that we don't miss any glocks in future. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 13391e5..4cbb695 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1816,15 +1816,17 @@ restart: if (gl) { gi->gl = hlist_entry(gl->gl_list.next, struct gfs2_glock, gl_list); - if (gi->gl) - gfs2_glock_hold(gi->gl); + } else { + gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, + struct gfs2_glock, gl_list); } + if (gi->gl) + gfs2_glock_hold(gi->gl); read_unlock(gl_lock_addr(gi->hash)); if (gl) gfs2_glock_put(gl); - if (gl && gi->gl == NULL) - gi->hash++; while (gi->gl == NULL) { + gi->hash++; if (gi->hash >= GFS2_GL_HASH_SIZE) return 1; read_lock(gl_lock_addr(gi->hash)); @@ -1833,7 +1835,6 @@ restart: if (gi->gl) gfs2_glock_hold(gi->gl); read_unlock(gl_lock_addr(gi->hash)); - gi->hash++; } if (gi->sdp != gi->gl->gl_sbd) -- cgit v0.10.2 From 9b8df98fc8973ad1c5f0d7c4cf71c7fb84fe22c5 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 8 Aug 2008 13:45:13 +0100 Subject: GFS2: Fix metafs mounts This patch is intended to fix the issues reported in bz #457798. Instead of having the metafs as a separate filesystem, it becomes a second root of gfs2. As a result it will appear as type gfs2 in /proc/mounts, but it is still possible (for backwards compatibility purposes) to mount it as type gfs2meta. A new mount flag "meta" is introduced so that its possible to tell the two cases apart in /proc/mounts. As a result it becomes possible to mount type gfs2 with -o meta and get the same result as mounting type gfs2meta. So it is possible to mount just the metafs on its own. Currently if you do this, its then impossible to mount the "normal" root of the gfs2 filesystem without first unmounting the metafs root. I'm not sure if thats a feature or a bug :-) Either way, this is a great improvement on the previous scheme and I've verified that it works ok with bind mounts on both the "normal" root and the metafs root in various combinations. There were also a bunch of functions in super.c which didn't belong there, so this moves them into ops_fstype.c where they can be static. Hopefully the mount/umount sequence is now more obvious as a result. Signed-off-by: Steven Whitehouse Cc: Alexander Viro diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 448697a..a1777a1 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -400,6 +400,7 @@ struct gfs2_args { int ar_quota; /* off/account/on */ int ar_suiddir; /* suiddir support */ int ar_data; /* ordered/writeback */ + int ar_meta; /* mount metafs */ }; struct gfs2_tune { @@ -461,7 +462,6 @@ struct gfs2_sb_host { struct gfs2_sbd { struct super_block *sd_vfs; - struct super_block *sd_vfs_meta; struct kobject sd_kobj; unsigned long sd_flags; /* SDF_... */ struct gfs2_sb_host sd_sb; @@ -499,7 +499,9 @@ struct gfs2_sbd { /* Inode Stuff */ - struct inode *sd_master_dir; + struct dentry *sd_master_dir; + struct dentry *sd_root_dir; + struct inode *sd_jindex; struct inode *sd_inum_inode; struct inode *sd_statfs_inode; @@ -634,7 +636,6 @@ struct gfs2_sbd { /* Debugging crud */ unsigned long sd_last_warning; - struct vfsmount *sd_gfs2mnt; struct dentry *debugfs_dir; /* debugfs directory */ struct dentry *debugfs_dentry_glocks; /* for debugfs */ }; diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c index b941f9f..df48333 100644 --- a/fs/gfs2/mount.c +++ b/fs/gfs2/mount.c @@ -42,6 +42,7 @@ enum { Opt_nosuiddir, Opt_data_writeback, Opt_data_ordered, + Opt_meta, Opt_err, }; @@ -66,6 +67,7 @@ static match_table_t tokens = { {Opt_nosuiddir, "nosuiddir"}, {Opt_data_writeback, "data=writeback"}, {Opt_data_ordered, "data=ordered"}, + {Opt_meta, "meta"}, {Opt_err, NULL} }; @@ -239,6 +241,11 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount) case Opt_data_ordered: args->ar_data = GFS2_DATA_ORDERED; break; + case Opt_meta: + if (remount && args->ar_meta != 1) + goto cant_remount; + args->ar_meta = 1; + break; case Opt_err: default: fs_info(sdp, "unknown option: %s\n", o); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b4d1d64..a6225cc 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -40,6 +40,45 @@ #define DO 0 #define UNDO 1 +static const u32 gfs2_old_fs_formats[] = { + 0 +}; + +static const u32 gfs2_old_multihost_formats[] = { + 0 +}; + +/** + * gfs2_tune_init - Fill a gfs2_tune structure with default values + * @gt: tune + * + */ + +static void gfs2_tune_init(struct gfs2_tune *gt) +{ + spin_lock_init(>->gt_spin); + + gt->gt_demote_secs = 300; + gt->gt_incore_log_blocks = 1024; + gt->gt_log_flush_secs = 60; + gt->gt_recoverd_secs = 60; + gt->gt_logd_secs = 1; + gt->gt_quotad_secs = 5; + gt->gt_quota_simul_sync = 64; + gt->gt_quota_warn_period = 10; + gt->gt_quota_scale_num = 1; + gt->gt_quota_scale_den = 1; + gt->gt_quota_cache_secs = 300; + gt->gt_quota_quantum = 60; + gt->gt_atime_quantum = 3600; + gt->gt_new_files_jdata = 0; + gt->gt_max_readahead = 1 << 18; + gt->gt_stall_secs = 600; + gt->gt_complain_secs = 10; + gt->gt_statfs_quantum = 30; + gt->gt_statfs_slow = 0; +} + static struct gfs2_sbd *init_sbd(struct super_block *sb) { struct gfs2_sbd *sdp; @@ -113,6 +152,272 @@ static void init_vfs(struct super_block *sb, unsigned noatime) sb->s_flags |= MS_NOATIME | MS_NODIRATIME; } +/** + * gfs2_check_sb - Check superblock + * @sdp: the filesystem + * @sb: The superblock + * @silent: Don't print a message if the check fails + * + * Checks the version code of the FS is one that we understand how to + * read and that the sizes of the various on-disk structures have not + * changed. + */ + +static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) +{ + unsigned int x; + + if (sb->sb_magic != GFS2_MAGIC || + sb->sb_type != GFS2_METATYPE_SB) { + if (!silent) + printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); + return -EINVAL; + } + + /* If format numbers match exactly, we're done. */ + + if (sb->sb_fs_format == GFS2_FORMAT_FS && + sb->sb_multihost_format == GFS2_FORMAT_MULTI) + return 0; + + if (sb->sb_fs_format != GFS2_FORMAT_FS) { + for (x = 0; gfs2_old_fs_formats[x]; x++) + if (gfs2_old_fs_formats[x] == sb->sb_fs_format) + break; + + if (!gfs2_old_fs_formats[x]) { + printk(KERN_WARNING + "GFS2: code version (%u, %u) is incompatible " + "with ondisk format (%u, %u)\n", + GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, + sb->sb_fs_format, sb->sb_multihost_format); + printk(KERN_WARNING + "GFS2: I don't know how to upgrade this FS\n"); + return -EINVAL; + } + } + + if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { + for (x = 0; gfs2_old_multihost_formats[x]; x++) + if (gfs2_old_multihost_formats[x] == + sb->sb_multihost_format) + break; + + if (!gfs2_old_multihost_formats[x]) { + printk(KERN_WARNING + "GFS2: code version (%u, %u) is incompatible " + "with ondisk format (%u, %u)\n", + GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, + sb->sb_fs_format, sb->sb_multihost_format); + printk(KERN_WARNING + "GFS2: I don't know how to upgrade this FS\n"); + return -EINVAL; + } + } + + if (!sdp->sd_args.ar_upgrade) { + printk(KERN_WARNING + "GFS2: code version (%u, %u) is incompatible " + "with ondisk format (%u, %u)\n", + GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, + sb->sb_fs_format, sb->sb_multihost_format); + printk(KERN_INFO + "GFS2: Use the \"upgrade\" mount option to upgrade " + "the FS\n"); + printk(KERN_INFO "GFS2: See the manual for more details\n"); + return -EINVAL; + } + + return 0; +} + +static void end_bio_io_page(struct bio *bio, int error) +{ + struct page *page = bio->bi_private; + + if (!error) + SetPageUptodate(page); + else + printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); + unlock_page(page); +} + +static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) +{ + const struct gfs2_sb *str = buf; + + sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); + sb->sb_type = be32_to_cpu(str->sb_header.mh_type); + sb->sb_format = be32_to_cpu(str->sb_header.mh_format); + sb->sb_fs_format = be32_to_cpu(str->sb_fs_format); + sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format); + sb->sb_bsize = be32_to_cpu(str->sb_bsize); + sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift); + sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr); + sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino); + sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr); + sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino); + + memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); + memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); +} + +/** + * gfs2_read_super - Read the gfs2 super block from disk + * @sdp: The GFS2 super block + * @sector: The location of the super block + * @error: The error code to return + * + * This uses the bio functions to read the super block from disk + * because we want to be 100% sure that we never read cached data. + * A super block is read twice only during each GFS2 mount and is + * never written to by the filesystem. The first time its read no + * locks are held, and the only details which are looked at are those + * relating to the locking protocol. Once locking is up and working, + * the sb is read again under the lock to establish the location of + * the master directory (contains pointers to journals etc) and the + * root directory. + * + * Returns: 0 on success or error + */ + +static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) +{ + struct super_block *sb = sdp->sd_vfs; + struct gfs2_sb *p; + struct page *page; + struct bio *bio; + + page = alloc_page(GFP_NOFS); + if (unlikely(!page)) + return -ENOBUFS; + + ClearPageUptodate(page); + ClearPageDirty(page); + lock_page(page); + + bio = bio_alloc(GFP_NOFS, 1); + if (unlikely(!bio)) { + __free_page(page); + return -ENOBUFS; + } + + bio->bi_sector = sector * (sb->s_blocksize >> 9); + bio->bi_bdev = sb->s_bdev; + bio_add_page(bio, page, PAGE_SIZE, 0); + + bio->bi_end_io = end_bio_io_page; + bio->bi_private = page; + submit_bio(READ_SYNC | (1 << BIO_RW_META), bio); + wait_on_page_locked(page); + bio_put(bio); + if (!PageUptodate(page)) { + __free_page(page); + return -EIO; + } + p = kmap(page); + gfs2_sb_in(&sdp->sd_sb, p); + kunmap(page); + __free_page(page); + return 0; +} +/** + * gfs2_read_sb - Read super block + * @sdp: The GFS2 superblock + * @gl: the glock for the superblock (assumed to be held) + * @silent: Don't print message if mount fails + * + */ + +static int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) +{ + u32 hash_blocks, ind_blocks, leaf_blocks; + u32 tmp_blocks; + unsigned int x; + int error; + + error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); + if (error) { + if (!silent) + fs_err(sdp, "can't read superblock\n"); + return error; + } + + error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); + if (error) + return error; + + sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - + GFS2_BASIC_BLOCK_SHIFT; + sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; + sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_dinode)) / sizeof(u64); + sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_meta_header)) / sizeof(u64); + sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); + sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2; + sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; + sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64); + sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_meta_header)) / + sizeof(struct gfs2_quota_change); + + /* Compute maximum reservation required to add a entry to a directory */ + + hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH), + sdp->sd_jbsize); + + ind_blocks = 0; + for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) { + tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs); + ind_blocks += tmp_blocks; + } + + leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH; + + sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks; + + sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_dinode); + sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs; + for (x = 2;; x++) { + u64 space, d; + u32 m; + + space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs; + d = space; + m = do_div(d, sdp->sd_inptrs); + + if (d != sdp->sd_heightsize[x - 1] || m) + break; + sdp->sd_heightsize[x] = space; + } + sdp->sd_max_height = x; + sdp->sd_heightsize[x] = ~0; + gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); + + sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_dinode); + sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs; + for (x = 2;; x++) { + u64 space, d; + u32 m; + + space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs; + d = space; + m = do_div(d, sdp->sd_inptrs); + + if (d != sdp->sd_jheightsize[x - 1] || m) + break; + sdp->sd_jheightsize[x] = space; + } + sdp->sd_max_jheight = x; + sdp->sd_jheightsize[x] = ~0; + gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); + + return 0; +} + static int init_names(struct gfs2_sbd *sdp, int silent) { char *proto, *table; @@ -224,51 +529,59 @@ fail: return error; } -static inline struct inode *gfs2_lookup_root(struct super_block *sb, - u64 no_addr) +static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, + u64 no_addr, const char *name) { - return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); + struct gfs2_sbd *sdp = sb->s_fs_info; + struct dentry *dentry; + struct inode *inode; + + inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); + if (IS_ERR(inode)) { + fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); + return PTR_ERR(inode); + } + dentry = d_alloc_root(inode); + if (!dentry) { + fs_err(sdp, "can't alloc %s dentry\n", name); + iput(inode); + return -ENOMEM; + } + dentry->d_op = &gfs2_dops; + *dptr = dentry; + return 0; } -static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) +static int init_sb(struct gfs2_sbd *sdp, int silent) { struct super_block *sb = sdp->sd_vfs; struct gfs2_holder sb_gh; u64 no_addr; - struct inode *inode; - int error = 0; + int ret; - if (undo) { - if (sb->s_root) { - dput(sb->s_root); - sb->s_root = NULL; - } - return 0; + ret = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops, + LM_ST_SHARED, 0, &sb_gh); + if (ret) { + fs_err(sdp, "can't acquire superblock glock: %d\n", ret); + return ret; } - error = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops, - LM_ST_SHARED, 0, &sb_gh); - if (error) { - fs_err(sdp, "can't acquire superblock glock: %d\n", error); - return error; - } - - error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); - if (error) { - fs_err(sdp, "can't read superblock: %d\n", error); + ret = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); + if (ret) { + fs_err(sdp, "can't read superblock: %d\n", ret); goto out; } /* Set up the buffer cache and SB for real */ if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { - error = -EINVAL; + ret = -EINVAL; fs_err(sdp, "FS block size (%u) is too small for device " "block size (%u)\n", sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); goto out; } if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { - error = -EINVAL; + ret = -EINVAL; fs_err(sdp, "FS block size (%u) is too big for machine " "page size (%u)\n", sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE); @@ -278,26 +591,21 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) /* Get the root inode */ no_addr = sdp->sd_sb.sb_root_dir.no_addr; - if (sb->s_type == &gfs2meta_fs_type) - no_addr = sdp->sd_sb.sb_master_dir.no_addr; - inode = gfs2_lookup_root(sb, no_addr); - if (IS_ERR(inode)) { - error = PTR_ERR(inode); - fs_err(sdp, "can't read in root inode: %d\n", error); + ret = gfs2_lookup_root(sb, &sdp->sd_root_dir, no_addr, "root"); + if (ret) goto out; - } - sb->s_root = d_alloc_root(inode); - if (!sb->s_root) { - fs_err(sdp, "can't get root dentry\n"); - error = -ENOMEM; - iput(inode); - } else - sb->s_root->d_op = &gfs2_dops; - + /* Get the master inode */ + no_addr = sdp->sd_sb.sb_master_dir.no_addr; + ret = gfs2_lookup_root(sb, &sdp->sd_master_dir, no_addr, "master"); + if (ret) { + dput(sdp->sd_root_dir); + goto out; + } + sb->s_root = dget(sdp->sd_args.ar_meta ? sdp->sd_master_dir : sdp->sd_root_dir); out: gfs2_glock_dq_uninit(&sb_gh); - return error; + return ret; } /** @@ -372,6 +680,7 @@ static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) static int init_journal(struct gfs2_sbd *sdp, int undo) { + struct inode *master = sdp->sd_master_dir->d_inode; struct gfs2_holder ji_gh; struct task_struct *p; struct gfs2_inode *ip; @@ -383,7 +692,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) goto fail_recoverd; } - sdp->sd_jindex = gfs2_lookup_simple(sdp->sd_master_dir, "jindex"); + sdp->sd_jindex = gfs2_lookup_simple(master, "jindex"); if (IS_ERR(sdp->sd_jindex)) { fs_err(sdp, "can't lookup journal index: %d\n", error); return PTR_ERR(sdp->sd_jindex); @@ -506,25 +815,17 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) { int error = 0; struct gfs2_inode *ip; - struct inode *inode; + struct inode *master = sdp->sd_master_dir->d_inode; if (undo) goto fail_qinode; - inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr); - if (IS_ERR(inode)) { - error = PTR_ERR(inode); - fs_err(sdp, "can't read in master directory: %d\n", error); - goto fail; - } - sdp->sd_master_dir = inode; - error = init_journal(sdp, undo); if (error) - goto fail_master; + goto fail; /* Read in the master inode number inode */ - sdp->sd_inum_inode = gfs2_lookup_simple(sdp->sd_master_dir, "inum"); + sdp->sd_inum_inode = gfs2_lookup_simple(master, "inum"); if (IS_ERR(sdp->sd_inum_inode)) { error = PTR_ERR(sdp->sd_inum_inode); fs_err(sdp, "can't read in inum inode: %d\n", error); @@ -533,7 +834,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) /* Read in the master statfs inode */ - sdp->sd_statfs_inode = gfs2_lookup_simple(sdp->sd_master_dir, "statfs"); + sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs"); if (IS_ERR(sdp->sd_statfs_inode)) { error = PTR_ERR(sdp->sd_statfs_inode); fs_err(sdp, "can't read in statfs inode: %d\n", error); @@ -541,7 +842,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) } /* Read in the resource index inode */ - sdp->sd_rindex = gfs2_lookup_simple(sdp->sd_master_dir, "rindex"); + sdp->sd_rindex = gfs2_lookup_simple(master, "rindex"); if (IS_ERR(sdp->sd_rindex)) { error = PTR_ERR(sdp->sd_rindex); fs_err(sdp, "can't get resource index inode: %d\n", error); @@ -552,7 +853,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) sdp->sd_rindex_uptodate = 0; /* Read in the quota inode */ - sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota"); + sdp->sd_quota_inode = gfs2_lookup_simple(master, "quota"); if (IS_ERR(sdp->sd_quota_inode)) { error = PTR_ERR(sdp->sd_quota_inode); fs_err(sdp, "can't get quota file inode: %d\n", error); @@ -571,8 +872,6 @@ fail_inum: iput(sdp->sd_inum_inode); fail_journal: init_journal(sdp, UNDO); -fail_master: - iput(sdp->sd_master_dir); fail: return error; } @@ -583,6 +882,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) char buf[30]; int error = 0; struct gfs2_inode *ip; + struct inode *master = sdp->sd_master_dir->d_inode; if (sdp->sd_args.ar_spectator) return 0; @@ -590,7 +890,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) if (undo) goto fail_qc_gh; - pn = gfs2_lookup_simple(sdp->sd_master_dir, "per_node"); + pn = gfs2_lookup_simple(master, "per_node"); if (IS_ERR(pn)) { error = PTR_ERR(pn); fs_err(sdp, "can't find per_node directory: %d\n", error); @@ -828,7 +1128,7 @@ static int fill_super(struct super_block *sb, void *data, int silent) if (error) goto fail_lm; - error = init_sb(sdp, silent, DO); + error = init_sb(sdp, silent); if (error) goto fail_locking; @@ -869,7 +1169,11 @@ fail_per_node: fail_inodes: init_inodes(sdp, UNDO); fail_sb: - init_sb(sdp, 0, UNDO); + if (sdp->sd_root_dir) + dput(sdp->sd_root_dir); + if (sdp->sd_master_dir) + dput(sdp->sd_master_dir); + sb->s_root = NULL; fail_locking: init_locking(sdp, &mount_gh, UNDO); fail_lm: @@ -887,151 +1191,60 @@ fail: } static int gfs2_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) + const char *dev_name, void *data, struct vfsmount *mnt) { - struct super_block *sb; - struct gfs2_sbd *sdp; - int error = get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); - if (error) - goto out; - sb = mnt->mnt_sb; - sdp = sb->s_fs_info; - sdp->sd_gfs2mnt = mnt; -out: - return error; + return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); } -static int fill_super_meta(struct super_block *sb, struct super_block *new, - void *data, int silent) +static struct super_block *get_gfs2_sb(const char *dev_name) { - struct gfs2_sbd *sdp = sb->s_fs_info; - struct inode *inode; - int error = 0; - - new->s_fs_info = sdp; - sdp->sd_vfs_meta = sb; - - init_vfs(new, SDF_NOATIME); - - /* Get the master inode */ - inode = igrab(sdp->sd_master_dir); - - new->s_root = d_alloc_root(inode); - if (!new->s_root) { - fs_err(sdp, "can't get root dentry\n"); - error = -ENOMEM; - iput(inode); - } else - new->s_root->d_op = &gfs2_dops; - - return error; -} - -static int set_bdev_super(struct super_block *s, void *data) -{ - s->s_bdev = data; - s->s_dev = s->s_bdev->bd_dev; - return 0; -} - -static int test_bdev_super(struct super_block *s, void *data) -{ - return s->s_bdev == data; -} - -static struct super_block* get_gfs2_sb(const char *dev_name) -{ - struct kstat stat; + struct super_block *sb; struct nameidata nd; - struct super_block *sb = NULL, *s; int error; error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); if (error) { - printk(KERN_WARNING "GFS2: path_lookup on %s returned error\n", - dev_name); - goto out; - } - error = vfs_getattr(nd.path.mnt, nd.path.dentry, &stat); - - list_for_each_entry(s, &gfs2_fs_type.fs_supers, s_instances) { - if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) || - (S_ISDIR(stat.mode) && - s == nd.path.dentry->d_inode->i_sb)) { - sb = s; - goto free_nd; - } + printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", + dev_name, error); + return NULL; } - - printk(KERN_WARNING "GFS2: Unrecognized block device or " - "mount point %s\n", dev_name); - -free_nd: + sb = nd.path.dentry->d_inode->i_sb; + if (sb && (sb->s_type == &gfs2_fs_type)) + atomic_inc(&sb->s_active); + else + sb = NULL; path_put(&nd.path); -out: return sb; } static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - int error = 0; - struct super_block *sb = NULL, *new; + struct super_block *sb = NULL; struct gfs2_sbd *sdp; sb = get_gfs2_sb(dev_name); if (!sb) { printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); - error = -ENOENT; - goto error; + return -ENOENT; } sdp = sb->s_fs_info; - if (sdp->sd_vfs_meta) { - printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n"); - error = -EBUSY; - goto error; - } - down(&sb->s_bdev->bd_mount_sem); - new = sget(fs_type, test_bdev_super, set_bdev_super, sb->s_bdev); - up(&sb->s_bdev->bd_mount_sem); - if (IS_ERR(new)) { - error = PTR_ERR(new); - goto error; - } - new->s_flags = flags; - strlcpy(new->s_id, sb->s_id, sizeof(new->s_id)); - sb_set_blocksize(new, sb->s_blocksize); - error = fill_super_meta(sb, new, data, flags & MS_SILENT ? 1 : 0); - if (error) { - up_write(&new->s_umount); - deactivate_super(new); - goto error; - } - - new->s_flags |= MS_ACTIVE; - - /* Grab a reference to the gfs2 mount point */ - atomic_inc(&sdp->sd_gfs2mnt->mnt_count); - return simple_set_mnt(mnt, new); -error: - return error; + mnt->mnt_sb = sb; + mnt->mnt_root = dget(sdp->sd_master_dir); + return 0; } static void gfs2_kill_sb(struct super_block *sb) { - if (sb->s_fs_info) { - gfs2_delete_debugfs_file(sb->s_fs_info); - gfs2_meta_syncfs(sb->s_fs_info); - } - kill_block_super(sb); -} - -static void gfs2_kill_sb_meta(struct super_block *sb) -{ struct gfs2_sbd *sdp = sb->s_fs_info; - generic_shutdown_super(sb); - sdp->sd_vfs_meta = NULL; - atomic_dec(&sdp->sd_gfs2mnt->mnt_count); + gfs2_meta_syncfs(sdp); + dput(sdp->sd_root_dir); + dput(sdp->sd_master_dir); + sdp->sd_root_dir = NULL; + sdp->sd_master_dir = NULL; + shrink_dcache_sb(sb); + kill_block_super(sb); + gfs2_delete_debugfs_file(sdp); } struct file_system_type gfs2_fs_type = { @@ -1046,7 +1259,6 @@ struct file_system_type gfs2meta_fs_type = { .name = "gfs2meta", .fs_flags = FS_REQUIRES_DEV, .get_sb = gfs2_get_sb_meta, - .kill_sb = gfs2_kill_sb_meta, .owner = THIS_MODULE, }; diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index f66ea0f..8f332d2 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -63,6 +63,39 @@ static int gfs2_write_inode(struct inode *inode, int sync) } /** + * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one + * @sdp: the filesystem + * + * Returns: errno + */ + +static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) +{ + struct gfs2_holder t_gh; + int error; + + gfs2_quota_sync(sdp); + gfs2_statfs_sync(sdp); + + error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, + &t_gh); + if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + return error; + + gfs2_meta_syncfs(sdp); + gfs2_log_shutdown(sdp); + + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + + if (t_gh.gh_gl) + gfs2_glock_dq_uninit(&t_gh); + + gfs2_quota_cleanup(sdp); + + return error; +} + +/** * gfs2_put_super - Unmount the filesystem * @sb: The VFS superblock * @@ -73,12 +106,6 @@ static void gfs2_put_super(struct super_block *sb) struct gfs2_sbd *sdp = sb->s_fs_info; int error; - if (!sdp) - return; - - if (!strncmp(sb->s_type->name, "gfs2meta", 8)) - return; /* Nothing to do */ - /* Unfreeze the filesystem, if we need to */ mutex_lock(&sdp->sd_freeze_lock); @@ -101,7 +128,6 @@ static void gfs2_put_super(struct super_block *sb) /* Release stuff */ - iput(sdp->sd_master_dir); iput(sdp->sd_jindex); iput(sdp->sd_inum_inode); iput(sdp->sd_statfs_inode); @@ -152,6 +178,7 @@ static void gfs2_write_super(struct super_block *sb) * * Flushes the log to disk. */ + static int gfs2_sync_fs(struct super_block *sb, int wait) { sb->s_dirt = 0; @@ -295,6 +322,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) * inode's blocks, or alternatively pass the baton on to another * node for later deallocation. */ + static void gfs2_drop_inode(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); @@ -333,6 +361,16 @@ static void gfs2_clear_inode(struct inode *inode) } } +static int is_ancestor(const struct dentry *d1, const struct dentry *d2) +{ + do { + if (d1 == d2) + return 1; + d1 = d1->d_parent; + } while (!IS_ROOT(d1)); + return 0; +} + /** * gfs2_show_options - Show mount options for /proc/mounts * @s: seq_file structure @@ -346,6 +384,8 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; struct gfs2_args *args = &sdp->sd_args; + if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) + seq_printf(s, ",meta"); if (args->ar_lockproto[0]) seq_printf(s, ",lockproto=%s", args->ar_lockproto); if (args->ar_locktable[0]) @@ -414,6 +454,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) * conversion on the iopen lock, but we can change that later. This * is safe, just less efficient. */ + static void gfs2_delete_inode(struct inode *inode) { struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; @@ -478,8 +519,6 @@ out: clear_inode(inode); } - - static struct inode *gfs2_alloc_inode(struct super_block *sb) { struct gfs2_inode *ip; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index ca83199..c3ba3d9 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -33,313 +33,6 @@ #include "trans.h" #include "util.h" -static const u32 gfs2_old_fs_formats[] = { - 0 -}; - -static const u32 gfs2_old_multihost_formats[] = { - 0 -}; - -/** - * gfs2_tune_init - Fill a gfs2_tune structure with default values - * @gt: tune - * - */ - -void gfs2_tune_init(struct gfs2_tune *gt) -{ - spin_lock_init(>->gt_spin); - - gt->gt_demote_secs = 300; - gt->gt_incore_log_blocks = 1024; - gt->gt_log_flush_secs = 60; - gt->gt_recoverd_secs = 60; - gt->gt_logd_secs = 1; - gt->gt_quotad_secs = 5; - gt->gt_quota_simul_sync = 64; - gt->gt_quota_warn_period = 10; - gt->gt_quota_scale_num = 1; - gt->gt_quota_scale_den = 1; - gt->gt_quota_cache_secs = 300; - gt->gt_quota_quantum = 60; - gt->gt_atime_quantum = 3600; - gt->gt_new_files_jdata = 0; - gt->gt_max_readahead = 1 << 18; - gt->gt_stall_secs = 600; - gt->gt_complain_secs = 10; - gt->gt_statfs_quantum = 30; - gt->gt_statfs_slow = 0; -} - -/** - * gfs2_check_sb - Check superblock - * @sdp: the filesystem - * @sb: The superblock - * @silent: Don't print a message if the check fails - * - * Checks the version code of the FS is one that we understand how to - * read and that the sizes of the various on-disk structures have not - * changed. - */ - -int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) -{ - unsigned int x; - - if (sb->sb_magic != GFS2_MAGIC || - sb->sb_type != GFS2_METATYPE_SB) { - if (!silent) - printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); - return -EINVAL; - } - - /* If format numbers match exactly, we're done. */ - - if (sb->sb_fs_format == GFS2_FORMAT_FS && - sb->sb_multihost_format == GFS2_FORMAT_MULTI) - return 0; - - if (sb->sb_fs_format != GFS2_FORMAT_FS) { - for (x = 0; gfs2_old_fs_formats[x]; x++) - if (gfs2_old_fs_formats[x] == sb->sb_fs_format) - break; - - if (!gfs2_old_fs_formats[x]) { - printk(KERN_WARNING - "GFS2: code version (%u, %u) is incompatible " - "with ondisk format (%u, %u)\n", - GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, - sb->sb_fs_format, sb->sb_multihost_format); - printk(KERN_WARNING - "GFS2: I don't know how to upgrade this FS\n"); - return -EINVAL; - } - } - - if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { - for (x = 0; gfs2_old_multihost_formats[x]; x++) - if (gfs2_old_multihost_formats[x] == - sb->sb_multihost_format) - break; - - if (!gfs2_old_multihost_formats[x]) { - printk(KERN_WARNING - "GFS2: code version (%u, %u) is incompatible " - "with ondisk format (%u, %u)\n", - GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, - sb->sb_fs_format, sb->sb_multihost_format); - printk(KERN_WARNING - "GFS2: I don't know how to upgrade this FS\n"); - return -EINVAL; - } - } - - if (!sdp->sd_args.ar_upgrade) { - printk(KERN_WARNING - "GFS2: code version (%u, %u) is incompatible " - "with ondisk format (%u, %u)\n", - GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, - sb->sb_fs_format, sb->sb_multihost_format); - printk(KERN_INFO - "GFS2: Use the \"upgrade\" mount option to upgrade " - "the FS\n"); - printk(KERN_INFO "GFS2: See the manual for more details\n"); - return -EINVAL; - } - - return 0; -} - - -static void end_bio_io_page(struct bio *bio, int error) -{ - struct page *page = bio->bi_private; - - if (!error) - SetPageUptodate(page); - else - printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); - unlock_page(page); -} - -static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) -{ - const struct gfs2_sb *str = buf; - - sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); - sb->sb_type = be32_to_cpu(str->sb_header.mh_type); - sb->sb_format = be32_to_cpu(str->sb_header.mh_format); - sb->sb_fs_format = be32_to_cpu(str->sb_fs_format); - sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format); - sb->sb_bsize = be32_to_cpu(str->sb_bsize); - sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift); - sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr); - sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino); - sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr); - sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino); - - memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); - memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); -} - -/** - * gfs2_read_super - Read the gfs2 super block from disk - * @sdp: The GFS2 super block - * @sector: The location of the super block - * @error: The error code to return - * - * This uses the bio functions to read the super block from disk - * because we want to be 100% sure that we never read cached data. - * A super block is read twice only during each GFS2 mount and is - * never written to by the filesystem. The first time its read no - * locks are held, and the only details which are looked at are those - * relating to the locking protocol. Once locking is up and working, - * the sb is read again under the lock to establish the location of - * the master directory (contains pointers to journals etc) and the - * root directory. - * - * Returns: 0 on success or error - */ - -int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) -{ - struct super_block *sb = sdp->sd_vfs; - struct gfs2_sb *p; - struct page *page; - struct bio *bio; - - page = alloc_page(GFP_NOFS); - if (unlikely(!page)) - return -ENOBUFS; - - ClearPageUptodate(page); - ClearPageDirty(page); - lock_page(page); - - bio = bio_alloc(GFP_NOFS, 1); - if (unlikely(!bio)) { - __free_page(page); - return -ENOBUFS; - } - - bio->bi_sector = sector * (sb->s_blocksize >> 9); - bio->bi_bdev = sb->s_bdev; - bio_add_page(bio, page, PAGE_SIZE, 0); - - bio->bi_end_io = end_bio_io_page; - bio->bi_private = page; - submit_bio(READ_SYNC | (1 << BIO_RW_META), bio); - wait_on_page_locked(page); - bio_put(bio); - if (!PageUptodate(page)) { - __free_page(page); - return -EIO; - } - p = kmap(page); - gfs2_sb_in(&sdp->sd_sb, p); - kunmap(page); - __free_page(page); - return 0; -} - -/** - * gfs2_read_sb - Read super block - * @sdp: The GFS2 superblock - * @gl: the glock for the superblock (assumed to be held) - * @silent: Don't print message if mount fails - * - */ - -int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) -{ - u32 hash_blocks, ind_blocks, leaf_blocks; - u32 tmp_blocks; - unsigned int x; - int error; - - error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); - if (error) { - if (!silent) - fs_err(sdp, "can't read superblock\n"); - return error; - } - - error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); - if (error) - return error; - - sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - - GFS2_BASIC_BLOCK_SHIFT; - sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; - sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - - sizeof(struct gfs2_dinode)) / sizeof(u64); - sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - - sizeof(struct gfs2_meta_header)) / sizeof(u64); - sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); - sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2; - sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; - sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64); - sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - - sizeof(struct gfs2_meta_header)) / - sizeof(struct gfs2_quota_change); - - /* Compute maximum reservation required to add a entry to a directory */ - - hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH), - sdp->sd_jbsize); - - ind_blocks = 0; - for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) { - tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs); - ind_blocks += tmp_blocks; - } - - leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH; - - sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks; - - sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - - sizeof(struct gfs2_dinode); - sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs; - for (x = 2;; x++) { - u64 space, d; - u32 m; - - space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs; - d = space; - m = do_div(d, sdp->sd_inptrs); - - if (d != sdp->sd_heightsize[x - 1] || m) - break; - sdp->sd_heightsize[x] = space; - } - sdp->sd_max_height = x; - sdp->sd_heightsize[x] = ~0; - gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); - - sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - - sizeof(struct gfs2_dinode); - sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs; - for (x = 2;; x++) { - u64 space, d; - u32 m; - - space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs; - d = space; - m = do_div(d, sdp->sd_inptrs); - - if (d != sdp->sd_jheightsize[x - 1] || m) - break; - sdp->sd_jheightsize[x] = space; - } - sdp->sd_max_jheight = x; - sdp->sd_jheightsize[x] = ~0; - gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); - - return 0; -} - /** * gfs2_jindex_hold - Grab a lock on the jindex * @sdp: The GFS2 superblock @@ -581,39 +274,6 @@ fail: return error; } -/** - * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one - * @sdp: the filesystem - * - * Returns: errno - */ - -int gfs2_make_fs_ro(struct gfs2_sbd *sdp) -{ - struct gfs2_holder t_gh; - int error; - - gfs2_quota_sync(sdp); - gfs2_statfs_sync(sdp); - - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, - &t_gh); - if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) - return error; - - gfs2_meta_syncfs(sdp); - gfs2_log_shutdown(sdp); - - clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - - if (t_gh.gh_gl) - gfs2_glock_dq_uninit(&t_gh); - - gfs2_quota_cleanup(sdp); - - return error; -} - static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) { const struct gfs2_statfs_change *str = buf; diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 44361ec..50a4c9b 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -12,11 +12,6 @@ #include "incore.h" -void gfs2_tune_init(struct gfs2_tune *gt); - -int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent); -int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); -int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector); void gfs2_lm_unmount(struct gfs2_sbd *sdp); static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) @@ -40,7 +35,6 @@ int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, struct gfs2_inode **ipp); int gfs2_make_fs_rw(struct gfs2_sbd *sdp); -int gfs2_make_fs_ro(struct gfs2_sbd *sdp); int gfs2_statfs_init(struct gfs2_sbd *sdp); void gfs2_statfs_change(struct gfs2_sbd *sdp, -- cgit v0.10.2 From 72dbf4790fc6736f9cb54424245114acf0b0038c Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 12 Aug 2008 13:39:29 -0500 Subject: GFS2: rm on multiple nodes causes panic This patch fixes a problem whereby simultaneous unlink, rmdir, rename and link operations (e.g. rm -fR *) from multiple nodes on the same GFS2 file system can cause kernel panics, hangs, and/or memory corruption. It also gets rid of all the non-rgrp calls to gfs2_glock_nq_m. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index e2c62f7..35f6f03 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -159,9 +159,13 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); - error = gfs2_glock_nq_m(2, ghs); + error = gfs2_glock_nq(ghs); /* parent */ if (error) - goto out; + goto out_parent; + + error = gfs2_glock_nq(ghs + 1); /* child */ + if (error) + goto out_child; error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); if (error) @@ -245,8 +249,10 @@ out_alloc: if (alloc_required) gfs2_alloc_put(dip); out_gunlock: - gfs2_glock_dq_m(2, ghs); -out: + gfs2_glock_dq(ghs + 1); +out_child: + gfs2_glock_dq(ghs); +out_parent: gfs2_holder_uninit(ghs); gfs2_holder_uninit(ghs + 1); if (!error) { @@ -302,7 +308,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) error = gfs2_unlink_ok(dip, &dentry->d_name, ip); if (error) - goto out_rgrp; + goto out_gunlock; error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); if (error) @@ -316,6 +322,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) out_end_trans: gfs2_trans_end(sdp); +out_gunlock: gfs2_glock_dq(ghs + 2); out_rgrp: gfs2_holder_uninit(ghs + 2); @@ -485,7 +492,6 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) struct gfs2_holder ri_gh; int error; - error = gfs2_rindex_hold(sdp, &ri_gh); if (error) return error; @@ -495,9 +501,17 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); - error = gfs2_glock_nq_m(3, ghs); + error = gfs2_glock_nq(ghs); /* parent */ if (error) - goto out; + goto out_parent; + + error = gfs2_glock_nq(ghs + 1); /* child */ + if (error) + goto out_child; + + error = gfs2_glock_nq(ghs + 2); /* rgrp */ + if (error) + goto out_rgrp; error = gfs2_unlink_ok(dip, &dentry->d_name, ip); if (error) @@ -523,11 +537,15 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) gfs2_trans_end(sdp); out_gunlock: - gfs2_glock_dq_m(3, ghs); -out: - gfs2_holder_uninit(ghs); - gfs2_holder_uninit(ghs + 1); + gfs2_glock_dq(ghs + 2); +out_rgrp: gfs2_holder_uninit(ghs + 2); + gfs2_glock_dq(ghs + 1); +out_child: + gfs2_holder_uninit(ghs + 1); + gfs2_glock_dq(ghs); +out_parent: + gfs2_holder_uninit(ghs); gfs2_glock_dq_uninit(&ri_gh); return error; } @@ -639,9 +657,11 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); } - error = gfs2_glock_nq_m(num_gh, ghs); - if (error) - goto out_uninit; + for (x = 0; x < num_gh; x++) { + error = gfs2_glock_nq(ghs + x); + if (error) + goto out_gunlock; + } /* Check out the old directory */ @@ -804,10 +824,10 @@ out_alloc: if (alloc_required) gfs2_alloc_put(ndip); out_gunlock: - gfs2_glock_dq_m(num_gh, ghs); -out_uninit: - for (x = 0; x < num_gh; x++) + while (x--) { + gfs2_glock_dq(ghs + x); gfs2_holder_uninit(ghs + x); + } out_gunlock_r: if (dir_rename) gfs2_glock_dq_uninit(&r_gh); -- cgit v0.10.2 From 26d809af6397ce5c37f5c44d89734d19cce1ad25 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 13 Aug 2008 12:46:26 +0200 Subject: x86: fix xsave build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix this build failure with certain glibc versions: In file included from /usr/include/bits/sigcontext.h:28, from /usr/include/signal.h:333, from Documentation/accounting/getdelays.c:24: /home/mingo/tip/usr/include/asm/sigcontext.h:191: error: expected specifier-qualifier-list before ‘u64’ Signed-off-by: Ingo Molnar diff --git a/include/asm-x86/sigcontext.h b/include/asm-x86/sigcontext.h index 899fe2f..ee813f4 100644 --- a/include/asm-x86/sigcontext.h +++ b/include/asm-x86/sigcontext.h @@ -264,9 +264,9 @@ struct sigcontext { #endif /* !__i386__ */ struct _xsave_hdr { - u64 xstate_bv; - u64 reserved1[2]; - u64 reserved2[5]; + __u64 xstate_bv; + __u64 reserved1[2]; + __u64 reserved2[5]; }; /* -- cgit v0.10.2 From 34a1b9fc4995102ecbb3a980b348aebf168d8196 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 12 Aug 2008 13:25:44 +0100 Subject: Fix date output in x86 microcode driver. The microcode stores its date in a uint32_t in some weird order approximating pdp-endian. Rather than printing it like that, print it properly in ISO standard form. Signed-off-by: David Woodhouse Cc: Shaohua Li Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index a61986e..d2d9d74 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -339,8 +339,11 @@ static void apply_microcode(int cpu) return; } printk(KERN_INFO "microcode: CPU%d updated from revision " - "0x%x to 0x%x, date = %08x \n", - cpu_num, uci->rev, val[1], uci->mc.mc_intel->hdr.date); + "0x%x to 0x%x, date = %04x-%02x-%02x \n", + cpu_num, uci->rev, val[1], + uci->mc.mc_intel->hdr.date & 0xffff, + uci->mc.mc_intel->hdr.date >> 24, + (uci->mc.mc_intel->hdr.date >> 16) & 0xff); uci->rev = val[1]; } -- cgit v0.10.2 From ee2b92a8201a40021ecd1aee6f0625dc03bacc54 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 13 Aug 2008 11:38:13 -0700 Subject: x86, xsave: remove the redundant access_ok() in setup_rt_frame() save_i387_xstate() is already doing the required access_ok(). Remove the redundant access_ok() before it. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 2621b98..6c58169 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -208,9 +208,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, frame = (void __user *)round_down( (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; - if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) - goto give_sigsegv; - if (save_i387_xstate(fp) < 0) err |= -1; } else -- cgit v0.10.2 From ed405958057ca6a8c4c9178a7a3b1167fabb45f5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 13 Aug 2008 11:38:14 -0700 Subject: x86, xsave: clear the user buffer before doing fxsave/xsave fxsave/xsave instructions will not touch all the bytes in the fxsave/xsave frame. Clear the user buffer before doing fxsave/xsave directly to user buffer during the sigcontext setup. This is essentially needed in the context of xsave(for example, some of the fields in the xsave header are not touched by the xsave and defined as must be zero). This will also present uniform and clean context to the user (from which user can safely do fxrstor/xrstor). Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 7415f3e..bb097b1 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -92,6 +92,12 @@ int save_i387_xstate(void __user *buf) return 0; clear_used_math(); /* trigger finit */ if (task_thread_info(tsk)->status & TS_USEDFPU) { + /* + * Start with clearing the user buffer. This will present a + * clean context for the bytes not touched by the fxsave/xsave. + */ + __clear_user(buf, sig_xstate_size); + if (task_thread_info(tsk)->status & TS_XSAVE) err = xsave_user(buf); else -- cgit v0.10.2 From f65bc214e042916135256620f900e9599d65e0cb Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 13 Aug 2008 11:38:15 -0700 Subject: x86, xsave: use BUG_ON() instead of BUILD_BUG_ON() All these structure sizes are runtime determined. So use a runtime bug check. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index bb097b1..07713d6 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -82,8 +82,7 @@ int save_i387_xstate(void __user *buf) if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) return -EACCES; - BUILD_BUG_ON(sizeof(struct user_i387_struct) != - sizeof(tsk->thread.xstate->fxsave)); + BUG_ON(sig_xstate_size < xstate_size); if ((unsigned long)buf % 64) printk("save_i387_xstate: bad fpstate %p\n", buf); -- cgit v0.10.2 From dbc74c65b3fd841985935f676388c82d6b85c485 Mon Sep 17 00:00:00 2001 From: Vesa-Matti Kari Date: Thu, 7 Aug 2008 03:18:20 +0300 Subject: selinux: Unify for- and while-loop style Replace "thing != NULL" comparisons with just "thing" to make the code look more uniform (mixed styles were used even in the same source file). Signed-off-by: Vesa-Matti Kari Acked-by: Stephen Smalley Signed-off-by: James Morris diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c index e8ae812..1215b8e 100644 --- a/security/selinux/ss/avtab.c +++ b/security/selinux/ss/avtab.c @@ -229,7 +229,7 @@ void avtab_destroy(struct avtab *h) for (i = 0; i < h->nslot; i++) { cur = h->htable[i]; - while (cur != NULL) { + while (cur) { temp = cur; cur = cur->next; kmem_cache_free(avtab_node_cachep, temp); diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c index f8c850a..4a4e35c 100644 --- a/security/selinux/ss/conditional.c +++ b/security/selinux/ss/conditional.c @@ -29,7 +29,7 @@ static int cond_evaluate_expr(struct policydb *p, struct cond_expr *expr) int s[COND_EXPR_MAXDEPTH]; int sp = -1; - for (cur = expr; cur != NULL; cur = cur->next) { + for (cur = expr; cur; cur = cur->next) { switch (cur->expr_type) { case COND_BOOL: if (sp == (COND_EXPR_MAXDEPTH - 1)) @@ -97,14 +97,14 @@ int evaluate_cond_node(struct policydb *p, struct cond_node *node) if (new_state == -1) printk(KERN_ERR "SELinux: expression result was undefined - disabling all rules.\n"); /* turn the rules on or off */ - for (cur = node->true_list; cur != NULL; cur = cur->next) { + for (cur = node->true_list; cur; cur = cur->next) { if (new_state <= 0) cur->node->key.specified &= ~AVTAB_ENABLED; else cur->node->key.specified |= AVTAB_ENABLED; } - for (cur = node->false_list; cur != NULL; cur = cur->next) { + for (cur = node->false_list; cur; cur = cur->next) { /* -1 or 1 */ if (new_state) cur->node->key.specified &= ~AVTAB_ENABLED; @@ -128,7 +128,7 @@ int cond_policydb_init(struct policydb *p) static void cond_av_list_destroy(struct cond_av_list *list) { struct cond_av_list *cur, *next; - for (cur = list; cur != NULL; cur = next) { + for (cur = list; cur; cur = next) { next = cur->next; /* the avtab_ptr_t node is destroy by the avtab */ kfree(cur); @@ -139,7 +139,7 @@ static void cond_node_destroy(struct cond_node *node) { struct cond_expr *cur_expr, *next_expr; - for (cur_expr = node->expr; cur_expr != NULL; cur_expr = next_expr) { + for (cur_expr = node->expr; cur_expr; cur_expr = next_expr) { next_expr = cur_expr->next; kfree(cur_expr); } @@ -155,7 +155,7 @@ static void cond_list_destroy(struct cond_node *list) if (list == NULL) return; - for (cur = list; cur != NULL; cur = next) { + for (cur = list; cur; cur = next) { next = cur->next; cond_node_destroy(cur); } @@ -291,7 +291,7 @@ static int cond_insertf(struct avtab *a, struct avtab_key *k, struct avtab_datum goto err; } found = 0; - for (cur = other; cur != NULL; cur = cur->next) { + for (cur = other; cur; cur = cur->next) { if (cur->node == node_ptr) { found = 1; break; @@ -485,7 +485,7 @@ void cond_compute_av(struct avtab *ctab, struct avtab_key *key, struct av_decisi if (!ctab || !key || !avd) return; - for (node = avtab_search_node(ctab, key); node != NULL; + for (node = avtab_search_node(ctab, key); node; node = avtab_search_node_next(node, key->specified)) { if ((u16)(AVTAB_ALLOWED|AVTAB_ENABLED) == (node->key.specified & (AVTAB_ALLOWED|AVTAB_ENABLED))) diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index ddc2754..68c7348 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -109,7 +109,7 @@ int ebitmap_netlbl_export(struct ebitmap *ebmap, *catmap = c_iter; c_iter->startbit = e_iter->startbit & ~(NETLBL_CATMAP_SIZE - 1); - while (e_iter != NULL) { + while (e_iter) { for (i = 0; i < EBITMAP_UNIT_NUMS; i++) { unsigned int delta, e_startbit, c_endbit; @@ -197,7 +197,7 @@ int ebitmap_netlbl_import(struct ebitmap *ebmap, } } c_iter = c_iter->next; - } while (c_iter != NULL); + } while (c_iter); if (e_iter != NULL) ebmap->highbit = e_iter->startbit + EBITMAP_SIZE; else diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c index 2e7788e..933e735 100644 --- a/security/selinux/ss/hashtab.c +++ b/security/selinux/ss/hashtab.c @@ -81,7 +81,7 @@ void *hashtab_search(struct hashtab *h, const void *key) hvalue = h->hash_value(h, key); cur = h->htable[hvalue]; - while (cur != NULL && h->keycmp(h, key, cur->key) > 0) + while (cur && h->keycmp(h, key, cur->key) > 0) cur = cur->next; if (cur == NULL || (h->keycmp(h, key, cur->key) != 0)) @@ -100,7 +100,7 @@ void hashtab_destroy(struct hashtab *h) for (i = 0; i < h->size; i++) { cur = h->htable[i]; - while (cur != NULL) { + while (cur) { temp = cur; cur = cur->next; kfree(temp); @@ -127,7 +127,7 @@ int hashtab_map(struct hashtab *h, for (i = 0; i < h->size; i++) { cur = h->htable[i]; - while (cur != NULL) { + while (cur) { ret = apply(cur->key, cur->datum, args); if (ret) return ret; diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index b52f923..5a0536b 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -356,7 +356,7 @@ static int context_struct_compute_av(struct context *scontext, avkey.source_type = i + 1; avkey.target_type = j + 1; for (node = avtab_search_node(&policydb.te_avtab, &avkey); - node != NULL; + node; node = avtab_search_node_next(node, avkey.specified)) { if (node->key.specified == AVTAB_ALLOWED) avd->allowed |= node->datum.data; @@ -1037,7 +1037,7 @@ static int security_compute_sid(u32 ssid, /* If no permanent rule, also check for enabled conditional rules */ if (!avdatum) { node = avtab_search_node(&policydb.te_cond_avtab, &avkey); - for (; node != NULL; node = avtab_search_node_next(node, specified)) { + for (; node; node = avtab_search_node_next(node, specified)) { if (node->key.specified & AVTAB_ENABLED) { avdatum = &node->datum; break; @@ -2050,7 +2050,7 @@ int security_set_bools(int len, int *values) policydb.bool_val_to_struct[i]->state = 0; } - for (cur = policydb.cond_list; cur != NULL; cur = cur->next) { + for (cur = policydb.cond_list; cur; cur = cur->next) { rc = evaluate_cond_node(&policydb, cur); if (rc) goto out; @@ -2102,7 +2102,7 @@ static int security_preserve_bools(struct policydb *p) if (booldatum) booldatum->state = bvalues[i]; } - for (cur = p->cond_list; cur != NULL; cur = cur->next) { + for (cur = p->cond_list; cur; cur = cur->next) { rc = evaluate_cond_node(p, cur); if (rc) goto out; diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c index a81ded1..e817989 100644 --- a/security/selinux/ss/sidtab.c +++ b/security/selinux/ss/sidtab.c @@ -43,7 +43,7 @@ int sidtab_insert(struct sidtab *s, u32 sid, struct context *context) hvalue = SIDTAB_HASH(sid); prev = NULL; cur = s->htable[hvalue]; - while (cur != NULL && sid > cur->sid) { + while (cur && sid > cur->sid) { prev = cur; cur = cur->next; } @@ -92,7 +92,7 @@ static struct context *sidtab_search_core(struct sidtab *s, u32 sid, int force) hvalue = SIDTAB_HASH(sid); cur = s->htable[hvalue]; - while (cur != NULL && sid > cur->sid) + while (cur && sid > cur->sid) cur = cur->next; if (force && cur && sid == cur->sid && cur->context.len) @@ -103,7 +103,7 @@ static struct context *sidtab_search_core(struct sidtab *s, u32 sid, int force) sid = SECINITSID_UNLABELED; hvalue = SIDTAB_HASH(sid); cur = s->htable[hvalue]; - while (cur != NULL && sid > cur->sid) + while (cur && sid > cur->sid) cur = cur->next; if (!cur || sid != cur->sid) return NULL; @@ -136,7 +136,7 @@ int sidtab_map(struct sidtab *s, for (i = 0; i < SIDTAB_SIZE; i++) { cur = s->htable[i]; - while (cur != NULL) { + while (cur) { rc = apply(cur->sid, &cur->context, args); if (rc) goto out; @@ -155,7 +155,7 @@ static inline u32 sidtab_search_context(struct sidtab *s, for (i = 0; i < SIDTAB_SIZE; i++) { cur = s->htable[i]; - while (cur != NULL) { + while (cur) { if (context_cmp(&cur->context, context)) return cur->sid; cur = cur->next; @@ -242,7 +242,7 @@ void sidtab_destroy(struct sidtab *s) for (i = 0; i < SIDTAB_SIZE; i++) { cur = s->htable[i]; - while (cur != NULL) { + while (cur) { temp = cur; cur = cur->next; context_destroy(&temp->context); -- cgit v0.10.2 From c1bc667e844c2677cdf927102ab384fe7b033762 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Thu, 7 Aug 2008 16:43:38 +0200 Subject: IPVS: Add genetlink interface definitions to ip_vs.h Add IPVS Generic Netlink interface definitions to include/linux/ip_vs.h. Signed-off-by: Julius Volz Signed-off-by: Simon Horman diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index ec6eb49..0f434a2 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h @@ -242,4 +242,164 @@ struct ip_vs_daemon_user { int syncid; }; +/* + * + * IPVS Generic Netlink interface definitions + * + */ + +/* Generic Netlink family info */ + +#define IPVS_GENL_NAME "IPVS" +#define IPVS_GENL_VERSION 0x1 + +struct ip_vs_flags { + __be32 flags; + __be32 mask; +}; + +/* Generic Netlink command attributes */ +enum { + IPVS_CMD_UNSPEC = 0, + + IPVS_CMD_NEW_SERVICE, /* add service */ + IPVS_CMD_SET_SERVICE, /* modify service */ + IPVS_CMD_DEL_SERVICE, /* delete service */ + IPVS_CMD_GET_SERVICE, /* get service info */ + + IPVS_CMD_NEW_DEST, /* add destination */ + IPVS_CMD_SET_DEST, /* modify destination */ + IPVS_CMD_DEL_DEST, /* delete destination */ + IPVS_CMD_GET_DEST, /* get destination info */ + + IPVS_CMD_NEW_DAEMON, /* start sync daemon */ + IPVS_CMD_DEL_DAEMON, /* stop sync daemon */ + IPVS_CMD_GET_DAEMON, /* get sync daemon status */ + + IPVS_CMD_SET_CONFIG, /* set config settings */ + IPVS_CMD_GET_CONFIG, /* get config settings */ + + IPVS_CMD_SET_INFO, /* only used in GET_INFO reply */ + IPVS_CMD_GET_INFO, /* get general IPVS info */ + + IPVS_CMD_ZERO, /* zero all counters and stats */ + IPVS_CMD_FLUSH, /* flush services and dests */ + + __IPVS_CMD_MAX, +}; + +#define IPVS_CMD_MAX (__IPVS_CMD_MAX - 1) + +/* Attributes used in the first level of commands */ +enum { + IPVS_CMD_ATTR_UNSPEC = 0, + IPVS_CMD_ATTR_SERVICE, /* nested service attribute */ + IPVS_CMD_ATTR_DEST, /* nested destination attribute */ + IPVS_CMD_ATTR_DAEMON, /* nested sync daemon attribute */ + IPVS_CMD_ATTR_TIMEOUT_TCP, /* TCP connection timeout */ + IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, /* TCP FIN wait timeout */ + IPVS_CMD_ATTR_TIMEOUT_UDP, /* UDP timeout */ + __IPVS_CMD_ATTR_MAX, +}; + +#define IPVS_CMD_ATTR_MAX (__IPVS_SVC_ATTR_MAX - 1) + +/* + * Attributes used to describe a service + * + * Used inside nested attribute IPVS_CMD_ATTR_SERVICE + */ +enum { + IPVS_SVC_ATTR_UNSPEC = 0, + IPVS_SVC_ATTR_AF, /* address family */ + IPVS_SVC_ATTR_PROTOCOL, /* virtual service protocol */ + IPVS_SVC_ATTR_ADDR, /* virtual service address */ + IPVS_SVC_ATTR_PORT, /* virtual service port */ + IPVS_SVC_ATTR_FWMARK, /* firewall mark of service */ + + IPVS_SVC_ATTR_SCHED_NAME, /* name of scheduler */ + IPVS_SVC_ATTR_FLAGS, /* virtual service flags */ + IPVS_SVC_ATTR_TIMEOUT, /* persistent timeout */ + IPVS_SVC_ATTR_NETMASK, /* persistent netmask */ + + IPVS_SVC_ATTR_STATS, /* nested attribute for service stats */ + __IPVS_SVC_ATTR_MAX, +}; + +#define IPVS_SVC_ATTR_MAX (__IPVS_SVC_ATTR_MAX - 1) + +/* + * Attributes used to describe a destination (real server) + * + * Used inside nested attribute IPVS_CMD_ATTR_DEST + */ +enum { + IPVS_DEST_ATTR_UNSPEC = 0, + IPVS_DEST_ATTR_ADDR, /* real server address */ + IPVS_DEST_ATTR_PORT, /* real server port */ + + IPVS_DEST_ATTR_FWD_METHOD, /* forwarding method */ + IPVS_DEST_ATTR_WEIGHT, /* destination weight */ + + IPVS_DEST_ATTR_U_THRESH, /* upper threshold */ + IPVS_DEST_ATTR_L_THRESH, /* lower threshold */ + + IPVS_DEST_ATTR_ACTIVE_CONNS, /* active connections */ + IPVS_DEST_ATTR_INACT_CONNS, /* inactive connections */ + IPVS_DEST_ATTR_PERSIST_CONNS, /* persistent connections */ + + IPVS_DEST_ATTR_STATS, /* nested attribute for dest stats */ + __IPVS_DEST_ATTR_MAX, +}; + +#define IPVS_DEST_ATTR_MAX (__IPVS_DEST_ATTR_MAX - 1) + +/* + * Attributes describing a sync daemon + * + * Used inside nested attribute IPVS_CMD_ATTR_DAEMON + */ +enum { + IPVS_DAEMON_ATTR_UNSPEC = 0, + IPVS_DAEMON_ATTR_STATE, /* sync daemon state (master/backup) */ + IPVS_DAEMON_ATTR_MCAST_IFN, /* multicast interface name */ + IPVS_DAEMON_ATTR_SYNC_ID, /* SyncID we belong to */ + __IPVS_DAEMON_ATTR_MAX, +}; + +#define IPVS_DAEMON_ATTR_MAX (__IPVS_DAEMON_ATTR_MAX - 1) + +/* + * Attributes used to describe service or destination entry statistics + * + * Used inside nested attributes IPVS_SVC_ATTR_STATS and IPVS_DEST_ATTR_STATS + */ +enum { + IPVS_STATS_ATTR_UNSPEC = 0, + IPVS_STATS_ATTR_CONNS, /* connections scheduled */ + IPVS_STATS_ATTR_INPKTS, /* incoming packets */ + IPVS_STATS_ATTR_OUTPKTS, /* outgoing packets */ + IPVS_STATS_ATTR_INBYTES, /* incoming bytes */ + IPVS_STATS_ATTR_OUTBYTES, /* outgoing bytes */ + + IPVS_STATS_ATTR_CPS, /* current connection rate */ + IPVS_STATS_ATTR_INPPS, /* current in packet rate */ + IPVS_STATS_ATTR_OUTPPS, /* current out packet rate */ + IPVS_STATS_ATTR_INBPS, /* current in byte rate */ + IPVS_STATS_ATTR_OUTBPS, /* current out byte rate */ + __IPVS_STATS_ATTR_MAX, +}; + +#define IPVS_STATS_ATTR_MAX (__IPVS_STATS_ATTR_MAX - 1) + +/* Attributes used in response to IPVS_CMD_GET_INFO command */ +enum { + IPVS_INFO_ATTR_UNSPEC = 0, + IPVS_INFO_ATTR_VERSION, /* IPVS version number */ + IPVS_INFO_ATTR_CONN_TAB_SIZE, /* size of connection hash table */ + __IPVS_INFO_ATTR_MAX, +}; + +#define IPVS_INFO_ATTR_MAX (__IPVS_INFO_ATTR_MAX - 1) + #endif /* _IP_VS_H */ -- cgit v0.10.2 From 9a812198ae49967f239789164c55ec3e72b7e0dd Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Thu, 14 Aug 2008 14:08:44 +0200 Subject: IPVS: Add genetlink interface implementation Add the implementation of the new Generic Netlink interface to IPVS and keep the old set/getsockopt interface for userspace backwards compatibility. Signed-off-by: Julius Volz Acked-by: Sven Wegener Signed-off-by: Simon Horman diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 6379705..d1dbd8b 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -37,6 +37,7 @@ #include #include #include +#include #include @@ -2320,6 +2321,872 @@ static struct nf_sockopt_ops ip_vs_sockopts = { .owner = THIS_MODULE, }; +/* + * Generic Netlink interface + */ + +/* IPVS genetlink family */ +static struct genl_family ip_vs_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = IPVS_GENL_NAME, + .version = IPVS_GENL_VERSION, + .maxattr = IPVS_CMD_MAX, +}; + +/* Policy used for first-level command attributes */ +static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { + [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED }, + [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED }, + [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED }, + [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 }, + [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 }, + [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 }, +}; + +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */ +static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { + [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 }, + [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, + .len = IP_VS_IFNAME_MAXLEN }, + [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, +}; + +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */ +static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = { + [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 }, + [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 }, + [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY, + .len = sizeof(union nf_inet_addr) }, + [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 }, + [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, + [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, + .len = IP_VS_SCHEDNAME_MAXLEN }, + [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, + .len = sizeof(struct ip_vs_flags) }, + [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 }, + [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED }, +}; + +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */ +static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { + [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY, + .len = sizeof(union nf_inet_addr) }, + [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 }, + [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, +}; + +static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, + struct ip_vs_stats *stats) +{ + struct nlattr *nl_stats = nla_nest_start(skb, container_type); + if (!nl_stats) + return -EMSGSIZE; + + spin_lock_bh(&stats->lock); + + NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts); + NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes); + NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps); + + spin_unlock_bh(&stats->lock); + + nla_nest_end(skb, nl_stats); + + return 0; + +nla_put_failure: + spin_unlock_bh(&stats->lock); + nla_nest_cancel(skb, nl_stats); + return -EMSGSIZE; +} + +static int ip_vs_genl_fill_service(struct sk_buff *skb, + struct ip_vs_service *svc) +{ + struct nlattr *nl_service; + struct ip_vs_flags flags = { .flags = svc->flags, + .mask = ~0 }; + + nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); + if (!nl_service) + return -EMSGSIZE; + + NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET); + + if (svc->fwmark) { + NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark); + } else { + NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol); + NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr); + NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port); + } + + NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name); + NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags); + NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ); + NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask); + + if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats)) + goto nla_put_failure; + + nla_nest_end(skb, nl_service); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nl_service); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_service(struct sk_buff *skb, + struct ip_vs_service *svc, + struct netlink_callback *cb) +{ + void *hdr; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + &ip_vs_genl_family, NLM_F_MULTI, + IPVS_CMD_NEW_SERVICE); + if (!hdr) + return -EMSGSIZE; + + if (ip_vs_genl_fill_service(skb, svc) < 0) + goto nla_put_failure; + + return genlmsg_end(skb, hdr); + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_services(struct sk_buff *skb, + struct netlink_callback *cb) +{ + int idx = 0, i; + int start = cb->args[0]; + struct ip_vs_service *svc; + + mutex_lock(&__ip_vs_mutex); + for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { + list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { + if (++idx <= start) + continue; + if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { + idx--; + goto nla_put_failure; + } + } + } + + for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { + list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { + if (++idx <= start) + continue; + if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { + idx--; + goto nla_put_failure; + } + } + } + +nla_put_failure: + mutex_unlock(&__ip_vs_mutex); + cb->args[0] = idx; + + return skb->len; +} + +static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc, + struct nlattr *nla, int full_entry) +{ + struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; + struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr; + + /* Parse mandatory identifying service fields first */ + if (nla == NULL || + nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy)) + return -EINVAL; + + nla_af = attrs[IPVS_SVC_ATTR_AF]; + nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL]; + nla_addr = attrs[IPVS_SVC_ATTR_ADDR]; + nla_port = attrs[IPVS_SVC_ATTR_PORT]; + nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK]; + + if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr)))) + return -EINVAL; + + /* For now, only support IPv4 */ + if (nla_get_u16(nla_af) != AF_INET) + return -EAFNOSUPPORT; + + if (nla_fwmark) { + usvc->protocol = IPPROTO_TCP; + usvc->fwmark = nla_get_u32(nla_fwmark); + } else { + usvc->protocol = nla_get_u16(nla_protocol); + nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr)); + usvc->port = nla_get_u16(nla_port); + usvc->fwmark = 0; + } + + /* If a full entry was requested, check for the additional fields */ + if (full_entry) { + struct nlattr *nla_sched, *nla_flags, *nla_timeout, + *nla_netmask; + struct ip_vs_flags flags; + struct ip_vs_service *svc; + + nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; + nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; + nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; + nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; + + if (!(nla_sched && nla_flags && nla_timeout && nla_netmask)) + return -EINVAL; + + nla_memcpy(&flags, nla_flags, sizeof(flags)); + + /* prefill flags from service if it already exists */ + if (usvc->fwmark) + svc = __ip_vs_svc_fwm_get(usvc->fwmark); + else + svc = __ip_vs_service_get(usvc->protocol, usvc->addr, + usvc->port); + if (svc) { + usvc->flags = svc->flags; + ip_vs_service_put(svc); + } else + usvc->flags = 0; + + /* set new flags from userland */ + usvc->flags = (usvc->flags & ~flags.mask) | + (flags.flags & flags.mask); + + strlcpy(usvc->sched_name, nla_data(nla_sched), + sizeof(usvc->sched_name)); + usvc->timeout = nla_get_u32(nla_timeout); + usvc->netmask = nla_get_u32(nla_netmask); + } + + return 0; +} + +static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) +{ + struct ip_vs_service_user usvc; + int ret; + + ret = ip_vs_genl_parse_service(&usvc, nla, 0); + if (ret) + return ERR_PTR(ret); + + if (usvc.fwmark) + return __ip_vs_svc_fwm_get(usvc.fwmark); + else + return __ip_vs_service_get(usvc.protocol, usvc.addr, + usvc.port); +} + +static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) +{ + struct nlattr *nl_dest; + + nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST); + if (!nl_dest) + return -EMSGSIZE; + + NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr); + NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port); + + NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD, + atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight)); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, + atomic_read(&dest->activeconns)); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS, + atomic_read(&dest->inactconns)); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS, + atomic_read(&dest->persistconns)); + + if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats)) + goto nla_put_failure; + + nla_nest_end(skb, nl_dest); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nl_dest); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, + struct netlink_callback *cb) +{ + void *hdr; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + &ip_vs_genl_family, NLM_F_MULTI, + IPVS_CMD_NEW_DEST); + if (!hdr) + return -EMSGSIZE; + + if (ip_vs_genl_fill_dest(skb, dest) < 0) + goto nla_put_failure; + + return genlmsg_end(skb, hdr); + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_dests(struct sk_buff *skb, + struct netlink_callback *cb) +{ + int idx = 0; + int start = cb->args[0]; + struct ip_vs_service *svc; + struct ip_vs_dest *dest; + struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; + + mutex_lock(&__ip_vs_mutex); + + /* Try to find the service for which to dump destinations */ + if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, + IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) + goto out_err; + + svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]); + if (IS_ERR(svc) || svc == NULL) + goto out_err; + + /* Dump the destinations */ + list_for_each_entry(dest, &svc->destinations, n_list) { + if (++idx <= start) + continue; + if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) { + idx--; + goto nla_put_failure; + } + } + +nla_put_failure: + cb->args[0] = idx; + ip_vs_service_put(svc); + +out_err: + mutex_unlock(&__ip_vs_mutex); + + return skb->len; +} + +static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest, + struct nlattr *nla, int full_entry) +{ + struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1]; + struct nlattr *nla_addr, *nla_port; + + /* Parse mandatory identifying destination fields first */ + if (nla == NULL || + nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy)) + return -EINVAL; + + nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; + nla_port = attrs[IPVS_DEST_ATTR_PORT]; + + if (!(nla_addr && nla_port)) + return -EINVAL; + + nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); + udest->port = nla_get_u16(nla_port); + + /* If a full entry was requested, check for the additional fields */ + if (full_entry) { + struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, + *nla_l_thresh; + + nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD]; + nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT]; + nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH]; + nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH]; + + if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh)) + return -EINVAL; + + udest->conn_flags = nla_get_u32(nla_fwd) + & IP_VS_CONN_F_FWD_MASK; + udest->weight = nla_get_u32(nla_weight); + udest->u_threshold = nla_get_u32(nla_u_thresh); + udest->l_threshold = nla_get_u32(nla_l_thresh); + } + + return 0; +} + +static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state, + const char *mcast_ifn, __be32 syncid) +{ + struct nlattr *nl_daemon; + + nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON); + if (!nl_daemon) + return -EMSGSIZE; + + NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state); + NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn); + NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid); + + nla_nest_end(skb, nl_daemon); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nl_daemon); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state, + const char *mcast_ifn, __be32 syncid, + struct netlink_callback *cb) +{ + void *hdr; + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + &ip_vs_genl_family, NLM_F_MULTI, + IPVS_CMD_NEW_DAEMON); + if (!hdr) + return -EMSGSIZE; + + if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid)) + goto nla_put_failure; + + return genlmsg_end(skb, hdr); + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_daemons(struct sk_buff *skb, + struct netlink_callback *cb) +{ + mutex_lock(&__ip_vs_mutex); + if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { + if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, + ip_vs_master_mcast_ifn, + ip_vs_master_syncid, cb) < 0) + goto nla_put_failure; + + cb->args[0] = 1; + } + + if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { + if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, + ip_vs_backup_mcast_ifn, + ip_vs_backup_syncid, cb) < 0) + goto nla_put_failure; + + cb->args[1] = 1; + } + +nla_put_failure: + mutex_unlock(&__ip_vs_mutex); + + return skb->len; +} + +static int ip_vs_genl_new_daemon(struct nlattr **attrs) +{ + if (!(attrs[IPVS_DAEMON_ATTR_STATE] && + attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && + attrs[IPVS_DAEMON_ATTR_SYNC_ID])) + return -EINVAL; + + return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), + nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), + nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); +} + +static int ip_vs_genl_del_daemon(struct nlattr **attrs) +{ + if (!attrs[IPVS_DAEMON_ATTR_STATE]) + return -EINVAL; + + return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); +} + +static int ip_vs_genl_set_config(struct nlattr **attrs) +{ + struct ip_vs_timeout_user t; + + __ip_vs_get_timeouts(&t); + + if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) + t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); + + if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]) + t.tcp_fin_timeout = + nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]); + + if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) + t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); + + return ip_vs_set_timeout(&t); +} + +static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) +{ + struct ip_vs_service *svc = NULL; + struct ip_vs_service_user usvc; + struct ip_vs_dest_user udest; + int ret = 0, cmd; + int need_full_svc = 0, need_full_dest = 0; + + cmd = info->genlhdr->cmd; + + mutex_lock(&__ip_vs_mutex); + + if (cmd == IPVS_CMD_FLUSH) { + ret = ip_vs_flush(); + goto out; + } else if (cmd == IPVS_CMD_SET_CONFIG) { + ret = ip_vs_genl_set_config(info->attrs); + goto out; + } else if (cmd == IPVS_CMD_NEW_DAEMON || + cmd == IPVS_CMD_DEL_DAEMON) { + + struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; + + if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || + nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, + info->attrs[IPVS_CMD_ATTR_DAEMON], + ip_vs_daemon_policy)) { + ret = -EINVAL; + goto out; + } + + if (cmd == IPVS_CMD_NEW_DAEMON) + ret = ip_vs_genl_new_daemon(daemon_attrs); + else + ret = ip_vs_genl_del_daemon(daemon_attrs); + goto out; + } else if (cmd == IPVS_CMD_ZERO && + !info->attrs[IPVS_CMD_ATTR_SERVICE]) { + ret = ip_vs_zero_all(); + goto out; + } + + /* All following commands require a service argument, so check if we + * received a valid one. We need a full service specification when + * adding / editing a service. Only identifying members otherwise. */ + if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) + need_full_svc = 1; + + ret = ip_vs_genl_parse_service(&usvc, + info->attrs[IPVS_CMD_ATTR_SERVICE], + need_full_svc); + if (ret) + goto out; + + /* Lookup the exact service by or fwmark */ + if (usvc.fwmark == 0) + svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port); + else + svc = __ip_vs_svc_fwm_get(usvc.fwmark); + + /* Unless we're adding a new service, the service must already exist */ + if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) { + ret = -ESRCH; + goto out; + } + + /* Destination commands require a valid destination argument. For + * adding / editing a destination, we need a full destination + * specification. */ + if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST || + cmd == IPVS_CMD_DEL_DEST) { + if (cmd != IPVS_CMD_DEL_DEST) + need_full_dest = 1; + + ret = ip_vs_genl_parse_dest(&udest, + info->attrs[IPVS_CMD_ATTR_DEST], + need_full_dest); + if (ret) + goto out; + } + + switch (cmd) { + case IPVS_CMD_NEW_SERVICE: + if (svc == NULL) + ret = ip_vs_add_service(&usvc, &svc); + else + ret = -EEXIST; + break; + case IPVS_CMD_SET_SERVICE: + ret = ip_vs_edit_service(svc, &usvc); + break; + case IPVS_CMD_DEL_SERVICE: + ret = ip_vs_del_service(svc); + break; + case IPVS_CMD_NEW_DEST: + ret = ip_vs_add_dest(svc, &udest); + break; + case IPVS_CMD_SET_DEST: + ret = ip_vs_edit_dest(svc, &udest); + break; + case IPVS_CMD_DEL_DEST: + ret = ip_vs_del_dest(svc, &udest); + break; + case IPVS_CMD_ZERO: + ret = ip_vs_zero_service(svc); + break; + default: + ret = -EINVAL; + } + +out: + if (svc) + ip_vs_service_put(svc); + mutex_unlock(&__ip_vs_mutex); + + return ret; +} + +static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + void *reply; + int ret, cmd, reply_cmd; + + cmd = info->genlhdr->cmd; + + if (cmd == IPVS_CMD_GET_SERVICE) + reply_cmd = IPVS_CMD_NEW_SERVICE; + else if (cmd == IPVS_CMD_GET_INFO) + reply_cmd = IPVS_CMD_SET_INFO; + else if (cmd == IPVS_CMD_GET_CONFIG) + reply_cmd = IPVS_CMD_SET_CONFIG; + else { + IP_VS_ERR("unknown Generic Netlink command\n"); + return -EINVAL; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + mutex_lock(&__ip_vs_mutex); + + reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd); + if (reply == NULL) + goto nla_put_failure; + + switch (cmd) { + case IPVS_CMD_GET_SERVICE: + { + struct ip_vs_service *svc; + + svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]); + if (IS_ERR(svc)) { + ret = PTR_ERR(svc); + goto out_err; + } else if (svc) { + ret = ip_vs_genl_fill_service(msg, svc); + ip_vs_service_put(svc); + if (ret) + goto nla_put_failure; + } else { + ret = -ESRCH; + goto out_err; + } + + break; + } + + case IPVS_CMD_GET_CONFIG: + { + struct ip_vs_timeout_user t; + + __ip_vs_get_timeouts(&t); +#ifdef CONFIG_IP_VS_PROTO_TCP + NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); + NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, + t.tcp_fin_timeout); +#endif +#ifdef CONFIG_IP_VS_PROTO_UDP + NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout); +#endif + + break; + } + + case IPVS_CMD_GET_INFO: + NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE); + NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, + IP_VS_CONN_TAB_SIZE); + break; + } + + genlmsg_end(msg, reply); + ret = genlmsg_unicast(msg, info->snd_pid); + goto out; + +nla_put_failure: + IP_VS_ERR("not enough space in Netlink message\n"); + ret = -EMSGSIZE; + +out_err: + nlmsg_free(msg); +out: + mutex_unlock(&__ip_vs_mutex); + + return ret; +} + + +static struct genl_ops ip_vs_genl_ops[] __read_mostly = { + { + .cmd = IPVS_CMD_NEW_SERVICE, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_SET_SERVICE, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_DEL_SERVICE, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_GET_SERVICE, + .flags = GENL_ADMIN_PERM, + .doit = ip_vs_genl_get_cmd, + .dumpit = ip_vs_genl_dump_services, + .policy = ip_vs_cmd_policy, + }, + { + .cmd = IPVS_CMD_NEW_DEST, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_SET_DEST, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_DEL_DEST, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_GET_DEST, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .dumpit = ip_vs_genl_dump_dests, + }, + { + .cmd = IPVS_CMD_NEW_DAEMON, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_DEL_DAEMON, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_GET_DAEMON, + .flags = GENL_ADMIN_PERM, + .dumpit = ip_vs_genl_dump_daemons, + }, + { + .cmd = IPVS_CMD_SET_CONFIG, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_GET_CONFIG, + .flags = GENL_ADMIN_PERM, + .doit = ip_vs_genl_get_cmd, + }, + { + .cmd = IPVS_CMD_GET_INFO, + .flags = GENL_ADMIN_PERM, + .doit = ip_vs_genl_get_cmd, + }, + { + .cmd = IPVS_CMD_ZERO, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_FLUSH, + .flags = GENL_ADMIN_PERM, + .doit = ip_vs_genl_set_cmd, + }, +}; + +static int __init ip_vs_genl_register(void) +{ + int ret, i; + + ret = genl_register_family(&ip_vs_genl_family); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) { + ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]); + if (ret) + goto err_out; + } + return 0; + +err_out: + genl_unregister_family(&ip_vs_genl_family); + return ret; +} + +static void ip_vs_genl_unregister(void) +{ + genl_unregister_family(&ip_vs_genl_family); +} + +/* End of Generic Netlink interface definitions */ + int __init ip_vs_control_init(void) { @@ -2334,6 +3201,13 @@ int __init ip_vs_control_init(void) return ret; } + ret = ip_vs_genl_register(); + if (ret) { + IP_VS_ERR("cannot register Generic Netlink interface.\n"); + nf_unregister_sockopt(&ip_vs_sockopts); + return ret; + } + proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); @@ -2368,6 +3242,7 @@ void ip_vs_control_cleanup(void) unregister_sysctl_table(sysctl_header); proc_net_remove(&init_net, "ip_vs_stats"); proc_net_remove(&init_net, "ip_vs"); + ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); } -- cgit v0.10.2 From 82dfb6f32219d8e6cf6b979a520cb2b11d977d4e Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Mon, 11 Aug 2008 19:36:06 +0000 Subject: ipvs: Only call init_service, update_service and done_service for schedulers if defined There are schedulers that only schedule based on data available in the service or destination structures and they don't need any persistent storage or initialization routine. These schedulers currently provide dummy functions for the init_service, update_service and/or done_service functions. For the init_service and done_service cases we already have code that only calls these functions, if the scheduler provides them. Do the same for the update_service case and remove the dummy functions from all schedulers. Signed-off-by: Sven Wegener Signed-off-by: Simon Horman diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index d1dbd8b..ede101e 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -869,7 +869,8 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) svc->num_dests++; /* call the update_service function of its scheduler */ - svc->scheduler->update_service(svc); + if (svc->scheduler->update_service) + svc->scheduler->update_service(svc); write_unlock_bh(&__ip_vs_svc_lock); return 0; @@ -899,7 +900,8 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) svc->num_dests++; /* call the update_service function of its scheduler */ - svc->scheduler->update_service(svc); + if (svc->scheduler->update_service) + svc->scheduler->update_service(svc); write_unlock_bh(&__ip_vs_svc_lock); @@ -949,7 +951,8 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); /* call the update_service, because server weight may be changed */ - svc->scheduler->update_service(svc); + if (svc->scheduler->update_service) + svc->scheduler->update_service(svc); write_unlock_bh(&__ip_vs_svc_lock); @@ -1012,12 +1015,12 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, */ list_del(&dest->n_list); svc->num_dests--; - if (svcupd) { - /* - * Call the update_service function of its scheduler - */ - svc->scheduler->update_service(svc); - } + + /* + * Call the update_service function of its scheduler + */ + if (svcupd && svc->scheduler->update_service) + svc->scheduler->update_service(svc); } diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index 7a6a319..4a14d06 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c @@ -388,12 +388,6 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) } -static int ip_vs_lblc_update_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static inline struct ip_vs_dest * __ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) { @@ -542,7 +536,6 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = .n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list), .init_service = ip_vs_lblc_init_svc, .done_service = ip_vs_lblc_done_svc, - .update_service = ip_vs_lblc_update_svc, .schedule = ip_vs_lblc_schedule, }; diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index c234e73..46b8703 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -572,12 +572,6 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc) } -static int ip_vs_lblcr_update_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static inline struct ip_vs_dest * __ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) { @@ -731,7 +725,6 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list), .init_service = ip_vs_lblcr_init_svc, .done_service = ip_vs_lblcr_done_svc, - .update_service = ip_vs_lblcr_update_svc, .schedule = ip_vs_lblcr_schedule, }; diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c index ebcdbf7..2c3de1b 100644 --- a/net/ipv4/ipvs/ip_vs_lc.c +++ b/net/ipv4/ipvs/ip_vs_lc.c @@ -20,24 +20,6 @@ #include -static int ip_vs_lc_init_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int ip_vs_lc_done_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int ip_vs_lc_update_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static inline unsigned int ip_vs_lc_dest_overhead(struct ip_vs_dest *dest) { @@ -99,9 +81,6 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = { .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list), - .init_service = ip_vs_lc_init_svc, - .done_service = ip_vs_lc_done_svc, - .update_service = ip_vs_lc_update_svc, .schedule = ip_vs_lc_schedule, }; diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c index 92f3a67..5330d5a 100644 --- a/net/ipv4/ipvs/ip_vs_nq.c +++ b/net/ipv4/ipvs/ip_vs_nq.c @@ -37,27 +37,6 @@ #include -static int -ip_vs_nq_init_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int -ip_vs_nq_done_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int -ip_vs_nq_update_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static inline unsigned int ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) { @@ -137,9 +116,6 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list), - .init_service = ip_vs_nq_init_svc, - .done_service = ip_vs_nq_done_svc, - .update_service = ip_vs_nq_update_svc, .schedule = ip_vs_nq_schedule, }; diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c index 358110d..f749291 100644 --- a/net/ipv4/ipvs/ip_vs_rr.c +++ b/net/ipv4/ipvs/ip_vs_rr.c @@ -32,12 +32,6 @@ static int ip_vs_rr_init_svc(struct ip_vs_service *svc) } -static int ip_vs_rr_done_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static int ip_vs_rr_update_svc(struct ip_vs_service *svc) { svc->sched_data = &svc->destinations; @@ -96,7 +90,6 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = { .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), .init_service = ip_vs_rr_init_svc, - .done_service = ip_vs_rr_done_svc, .update_service = ip_vs_rr_update_svc, .schedule = ip_vs_rr_schedule, }; diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c index 77663d8..53f73be 100644 --- a/net/ipv4/ipvs/ip_vs_sed.c +++ b/net/ipv4/ipvs/ip_vs_sed.c @@ -41,27 +41,6 @@ #include -static int -ip_vs_sed_init_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int -ip_vs_sed_done_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int -ip_vs_sed_update_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static inline unsigned int ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) { @@ -139,9 +118,6 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list), - .init_service = ip_vs_sed_init_svc, - .done_service = ip_vs_sed_done_svc, - .update_service = ip_vs_sed_update_svc, .schedule = ip_vs_sed_schedule, }; diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c index 9b0ef86..df7ad8d 100644 --- a/net/ipv4/ipvs/ip_vs_wlc.c +++ b/net/ipv4/ipvs/ip_vs_wlc.c @@ -25,27 +25,6 @@ #include -static int -ip_vs_wlc_init_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int -ip_vs_wlc_done_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int -ip_vs_wlc_update_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static inline unsigned int ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest) { @@ -127,9 +106,6 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list), - .init_service = ip_vs_wlc_init_svc, - .done_service = ip_vs_wlc_done_svc, - .update_service = ip_vs_wlc_update_svc, .schedule = ip_vs_wlc_schedule, }; -- cgit v0.10.2 From a919cf4b6b499416b6e2247dbc79196c4325f2e6 Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Thu, 14 Aug 2008 00:47:16 +0200 Subject: ipvs: Create init functions for estimator code Commit 8ab19ea36c5c5340ff598e4d15fc084eb65671dc ("ipvs: Fix possible deadlock in estimator code") fixed a deadlock condition, but that condition can only happen during unload of IPVS, because during normal operation there is at least our global stats structure in the estimator list. The mod_timer() and del_timer_sync() calls are actually initialization and cleanup code in disguise. Let's make it explicit and move them to their own init and cleanup function. Signed-off-by: Sven Wegener Signed-off-by: Simon Horman diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 7312c3d..a25ad24 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -683,6 +683,8 @@ extern void ip_vs_sync_conn(struct ip_vs_conn *cp); /* * IPVS rate estimator prototypes (from ip_vs_est.c) */ +extern int ip_vs_estimator_init(void); +extern void ip_vs_estimator_cleanup(void); extern void ip_vs_new_estimator(struct ip_vs_stats *stats); extern void ip_vs_kill_estimator(struct ip_vs_stats *stats); extern void ip_vs_zero_estimator(struct ip_vs_stats *stats); diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index a7879ea..9fbf0a6 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -1070,10 +1070,12 @@ static int __init ip_vs_init(void) { int ret; + ip_vs_estimator_init(); + ret = ip_vs_control_init(); if (ret < 0) { IP_VS_ERR("can't setup control.\n"); - goto cleanup_nothing; + goto cleanup_estimator; } ip_vs_protocol_init(); @@ -1106,7 +1108,8 @@ static int __init ip_vs_init(void) cleanup_protocol: ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); - cleanup_nothing: + cleanup_estimator: + ip_vs_estimator_cleanup(); return ret; } @@ -1117,6 +1120,7 @@ static void __exit ip_vs_cleanup(void) ip_vs_app_cleanup(); ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); + ip_vs_estimator_cleanup(); IP_VS_INFO("ipvs unloaded.\n"); } diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c index 5a20f93..4fb620e 100644 --- a/net/ipv4/ipvs/ip_vs_est.c +++ b/net/ipv4/ipvs/ip_vs_est.c @@ -124,8 +124,6 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats) est->outbps = stats->outbps<<5; spin_lock_bh(&est_lock); - if (list_empty(&est_list)) - mod_timer(&est_timer, jiffies + 2 * HZ); list_add(&est->list, &est_list); spin_unlock_bh(&est_lock); } @@ -136,11 +134,6 @@ void ip_vs_kill_estimator(struct ip_vs_stats *stats) spin_lock_bh(&est_lock); list_del(&est->list); - while (list_empty(&est_list) && try_to_del_timer_sync(&est_timer) < 0) { - spin_unlock_bh(&est_lock); - cpu_relax(); - spin_lock_bh(&est_lock); - } spin_unlock_bh(&est_lock); } @@ -160,3 +153,14 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats) est->inbps = 0; est->outbps = 0; } + +int __init ip_vs_estimator_init(void) +{ + mod_timer(&est_timer, jiffies + 2 * HZ); + return 0; +} + +void ip_vs_estimator_cleanup(void) +{ + del_timer_sync(&est_timer); +} -- cgit v0.10.2 From 4a031b0e6acd8a8c23725ceb5db6a0aa5c4e231f Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 15 Aug 2008 09:26:15 +1000 Subject: ipvs: rename __ip_vs_wlc_schedule in lblc and lblcr schedulers For the sake of clarity, rename __ip_vs_wlc_schedule() in lblc.c to __ip_vs_lblc_schedule() and the version in lblcr.c to __ip_vs_lblc_schedule(). I guess the original name stuck from a copy and paste. Cc: Sven Wegener Signed-off-by: Simon Horman diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index 4a14d06..b9b334c 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c @@ -389,7 +389,7 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) static inline struct ip_vs_dest * -__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) +__ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph) { struct ip_vs_dest *dest, *least; int loh, doh; @@ -488,7 +488,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) tbl = (struct ip_vs_lblc_table *)svc->sched_data; en = ip_vs_lblc_get(tbl, iph->daddr); if (en == NULL) { - dest = __ip_vs_wlc_schedule(svc, iph); + dest = __ip_vs_lblc_schedule(svc, iph); if (dest == NULL) { IP_VS_DBG(1, "no destination available\n"); return NULL; @@ -503,7 +503,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) if (!(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 || is_overloaded(dest, svc)) { - dest = __ip_vs_wlc_schedule(svc, iph); + dest = __ip_vs_lblc_schedule(svc, iph); if (dest == NULL) { IP_VS_DBG(1, "no destination available\n"); return NULL; diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 46b8703..f1c8450 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -573,7 +573,7 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc) static inline struct ip_vs_dest * -__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) +__ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph) { struct ip_vs_dest *dest, *least; int loh, doh; @@ -673,7 +673,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) tbl = (struct ip_vs_lblcr_table *)svc->sched_data; en = ip_vs_lblcr_get(tbl, iph->daddr); if (en == NULL) { - dest = __ip_vs_wlc_schedule(svc, iph); + dest = __ip_vs_lblcr_schedule(svc, iph); if (dest == NULL) { IP_VS_DBG(1, "no destination available\n"); return NULL; @@ -687,7 +687,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } else { dest = ip_vs_dest_set_min(&en->set); if (!dest || is_overloaded(dest, svc)) { - dest = __ip_vs_wlc_schedule(svc, iph); + dest = __ip_vs_lblcr_schedule(svc, iph); if (dest == NULL) { IP_VS_DBG(1, "no destination available\n"); return NULL; -- cgit v0.10.2 From b76d69ed721e8365739c3bd5dd7891efbea88494 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Thu, 14 Aug 2008 17:25:37 -0700 Subject: x86_64: uml: fix rename header guard In unistd_64.h, the guard macro _ASM_X86_64_UNISTD_H_ is renamed to ASM_X86__UNISTD_64_H. This change should be applied to arch/um/sys-x86_64/syscall_table.c. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar diff --git a/arch/um/sys-x86_64/syscall_table.c b/arch/um/sys-x86_64/syscall_table.c index c128eb8..32f5fbe 100644 --- a/arch/um/sys-x86_64/syscall_table.c +++ b/arch/um/sys-x86_64/syscall_table.c @@ -41,12 +41,12 @@ #define stub_rt_sigreturn sys_rt_sigreturn #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; -#undef _ASM_X86_64_UNISTD_H_ +#undef ASM_X86__UNISTD_64_H #include #undef __SYSCALL #define __SYSCALL(nr, sym) [ nr ] = sym, -#undef _ASM_X86_64_UNISTD_H_ +#undef ASM_X86__UNISTD_64_H typedef void (*sys_call_ptr_t)(void); -- cgit v0.10.2 From ed4e5ec177d20504c51aebb93db12d57716cde9c Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:20 +0200 Subject: x86: apic - use SET_APIC_DEST_FIELD instead of hardcoded shift Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 69a876b..77c5e5e 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -150,7 +150,7 @@ u32 safe_xapic_wait_icr_idle(void) void xapic_icr_write(u32 low, u32 id) { - apic_write(APIC_ICR2, id << 24); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); apic_write(APIC_ICR, low); } -- cgit v0.10.2 From 36fef0949c14445d231a68663e8a01860f7caca3 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:20 +0200 Subject: x86: apic - unify disable_apic_timer Get rid of local_apic_timer_disabled and use disable_apic_timer instead. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 12b1548..6af20dd 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -63,7 +63,7 @@ int disable_apic; /* Local APIC timer verification ok */ static int local_apic_timer_verify_ok; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ -static int local_apic_timer_disabled; +static int disable_apic_timer __cpuinitdata; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); @@ -574,7 +574,7 @@ void __init setup_boot_APIC_clock(void) * timer as a dummy clock event source on SMP systems, so the * broadcast mechanism is used. On UP systems simply ignore it. */ - if (local_apic_timer_disabled) { + if (disable_apic_timer) { /* No broadcast on UP ! */ if (num_possible_cpus() > 1) { lapic_clockevent.mult = 1; @@ -1699,12 +1699,19 @@ static int __init parse_nolapic(char *arg) } early_param("nolapic", parse_nolapic); -static int __init parse_disable_lapic_timer(char *arg) +static int __init parse_disable_apic_timer(char *arg) { - local_apic_timer_disabled = 1; + disable_apic_timer = 1; return 0; } -early_param("nolapic_timer", parse_disable_lapic_timer); +early_param("noapictimer", parse_disable_apic_timer); + +static int __init parse_nolapic_timer(char *arg) +{ + disable_apic_timer = 1; + return 0; +} +early_param("nolapic_timer", parse_nolapic_timer); static int __init parse_lapic_timer_c2_ok(char *arg) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 77c5e5e..1e925be 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -45,6 +45,7 @@ #include #include +/* Disable local APIC timer from the kernel commandline or via dmi quirk */ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; @@ -1583,14 +1584,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg) } early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); -static __init int setup_noapictimer(char *str) +static int __init parse_disable_apic_timer(char *arg) { - if (str[0] != ' ' && str[0] != 0) - return 0; disable_apic_timer = 1; - return 1; + return 0; +} +early_param("noapictimer", parse_disable_apic_timer); + +static int __init parse_nolapic_timer(char *arg) +{ + disable_apic_timer = 1; + return 0; } -__setup("noapictimer", setup_noapictimer); +early_param("nolapic_timer", parse_nolapic_timer); static __init int setup_apicpmtimer(char *s) { -- cgit v0.10.2 From f07f4f9046121ac803bc2f0ded3d77b7c2ab481b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:21 +0200 Subject: x86: apic - unify __setup_APIC_LVTT To be able to unify this function we RE-introduce APIC_DIVISOR for 64bit mode. This snipped was eliminated in some time ago in a sake of clenup but now we need it again since it allow up to get rid of #ifdef(s). And lapic_is_integrated call is added in apic_64.c but since we always have APIC integrated on 64bit cpu compiler will ignore this call. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 1e925be..ac399d0 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -244,6 +244,9 @@ int lapic_get_maxlvt(void) return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } +/* Clock divisor is set to 1 */ +#define APIC_DIVISOR 1 + /* * This function sets up the local APIC timer, with a timeout of * 'clocks' APIC bus clock. During calibration we actually call @@ -262,6 +265,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) lvtt_value = LOCAL_TIMER_VECTOR; if (!oneshot) lvtt_value |= APIC_LVT_TIMER_PERIODIC; + if (!lapic_is_integrated()) + lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); + if (!irqen) lvtt_value |= APIC_LVT_MASKED; @@ -276,7 +282,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | APIC_TDR_DIV_16); if (!oneshot) - apic_write(APIC_TMICT, clocks); + apic_write(APIC_TMICT, clocks / APIC_DIVISOR); } /* @@ -446,7 +452,7 @@ static int __init calibrate_APIC_clock(void) lapic_clockevent.min_delta_ns = clockevent_delta2ns(0xF, &lapic_clockevent); - calibration_result = result / HZ; + calibration_result = (result * APIC_DIVISOR) / HZ; /* * Do a sanity check on the APIC calibration result -- cgit v0.10.2 From 9ce122c6e55c44ae9a4c4c777579b87d83e7f898 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:21 +0200 Subject: x86: apic - do not clear APIC twice in lapic_shutdown There is no need to clear APIC twice since disable_local_APIC will clear it anyway. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 6af20dd..a151d66 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -830,10 +830,11 @@ void lapic_shutdown(void) return; local_irq_save(flags); - clear_local_APIC(); if (enabled_via_apicbase) disable_local_APIC(); + else + clear_local_APIC(); local_irq_restore(flags); } -- cgit v0.10.2 From 64e474d168e3cf31e44890b4947644d361f84e43 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:22 +0200 Subject: x86: apic - get rid of local_apic_timer_verify_ok We are able to use clock_event_device as it's done in 64bit apic code so lets get rid of local_apic_timer_verify_ok variable. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index a151d66..7783c11 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -60,8 +60,6 @@ unsigned long mp_lapic_addr; static int force_enable_local_apic; int disable_apic; -/* Local APIC timer verification ok */ -static int local_apic_timer_verify_ok; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ static int disable_apic_timer __cpuinitdata; /* Local APIC timer works in C2 */ @@ -301,7 +299,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, unsigned int v; /* Lapic used for broadcast ? */ - if (!local_apic_timer_verify_ok) + if (evt->features & CLOCK_EVT_FEAT_DUMMY) return; local_irq_save(flags); @@ -514,7 +512,7 @@ static int __init calibrate_APIC_clock(void) return -1; } - local_apic_timer_verify_ok = 1; + levt->features &= ~CLOCK_EVT_FEAT_DUMMY; /* We trust the pm timer based calibration */ if (!pm_referenced) { @@ -548,11 +546,11 @@ static int __init calibrate_APIC_clock(void) if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); else - local_apic_timer_verify_ok = 0; + levt->features |= CLOCK_EVT_FEAT_DUMMY; } else local_irq_enable(); - if (!local_apic_timer_verify_ok) { + if (levt->features & CLOCK_EVT_FEAT_DUMMY) { printk(KERN_WARNING "APIC timer disabled due to verification failure.\n"); return -1; -- cgit v0.10.2 From c93baa1ae51cdba25a5f5ad37b2348e700e75daf Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:22 +0200 Subject: x86: apic - unify verify_local_APIC Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 7783c11..7ef7c78 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -878,6 +878,12 @@ int __init verify_local_APIC(void) */ reg0 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); + apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); + reg1 = apic_read(APIC_ID); + apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); + apic_write(APIC_ID, reg0); + if (reg1 != (reg0 ^ APIC_ID_MASK)) + return 0; /* * The next two are just to see if we have sane values. -- cgit v0.10.2 From 296cb9511dcc3895fda84d0cd5b411bd926e4bb3 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:23 +0200 Subject: x86: apic - unify sync_Arb_IDs Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 7ef7c78..d074889 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -915,8 +915,7 @@ void __init sync_Arb_IDs(void) apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, - APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index ac399d0..41aff34 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -742,8 +742,11 @@ int __init verify_local_APIC(void) */ void __init sync_Arb_IDs(void) { - /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ - if (modern_apic()) + /* + * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not + * needed on AMD. + */ + if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) return; /* @@ -752,8 +755,7 @@ void __init sync_Arb_IDs(void) apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG - | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* -- cgit v0.10.2 From 293a17ebc944c958e24e6ffbd1d5a49abdbf489e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 12 Aug 2008 17:25:03 -0700 Subject: rcu: prevent console flood when one CPU sees another AWOL via RCU One small change needed to keep from flooding the console when one CPU notices that another is AWOL. Unless I am missing something subtle. Otherwise the cleanups look good! Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index 56b8712..dab2676 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -308,6 +308,7 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp) spin_unlock(&rcp->lock); return; } + rcp->gp_check = get_seconds() + 30; spin_unlock(&rcp->lock); /* OK, time to rat on our buddy... */ -- cgit v0.10.2 From 1f7b94cd3d564901f9e04a8bc5832ae7bfd690a0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 5 Aug 2008 09:21:44 -0700 Subject: rcu: classic RCU locking and memory-barrier cleanups This patch simplifies the locking and memory-barrier usage in the Classic RCU grace-period-detection mechanism, incorporating Lai Jiangshan's feedback from the earlier version (http://lkml.org/lkml/2008/8/1/400 and http://lkml.org/lkml/2008/8/3/43). Passed 10 hours of rcutorture concurrent with CPUs being put online and taken offline on a 128-hardware-thread Power machine. My apologies to whoever in the Eastern Hemisphere was planning to use this machine over the Western Hemisphere night, but it was sitting idle and... So this is ready for tip/core/rcu. This patch is in preparation for moving to a hierarchical algorithm to allow the very large SMP machines -- requested by some people at OLS, and there seem to have been a few recent patches in the 4096-CPU direction as well. The general idea is to move to a much more conservative concurrency design, then apply a hierarchy to reduce contention on the global lock by a few orders of magnitude (larger machines would see greater reductions). The reason for taking a conservative approach is that this code isn't on any fast path. Prototype in progress. This patch is against the linux-tip git tree (tip/core/rcu). If you wish to test this against 2.6.26, use the following set of patches: http://www.rdrop.com/users/paulmck/patches/2.6.26-ljsimp-1.patch http://www.rdrop.com/users/paulmck/patches/2.6.26-ljsimpfix-3.patch The first patch combines commits 5127bed588a2f8f3a1f732de2a8a190b7df5dce3 and 3cac97cbb14aed00d83eb33d4613b0fe3aaea863 from Lai Jiangshan , and the second patch contains my changes. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index dab2676..5de1266 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -87,6 +87,7 @@ static void force_quiescent_state(struct rcu_data *rdp, int cpu; cpumask_t cpumask; set_need_resched(); + spin_lock(&rcp->lock); if (unlikely(!rcp->signaled)) { rcp->signaled = 1; /* @@ -112,6 +113,7 @@ static void force_quiescent_state(struct rcu_data *rdp, for_each_cpu_mask_nr(cpu, cpumask) smp_send_reschedule(cpu); } + spin_unlock(&rcp->lock); } #else static inline void force_quiescent_state(struct rcu_data *rdp, @@ -125,7 +127,9 @@ static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp, struct rcu_data *rdp) { long batch; - smp_mb(); /* reads the most recently updated value of rcu->cur. */ + + head->next = NULL; + smp_mb(); /* Read of rcu->cur must happen after any change by caller. */ /* * Determine the batch number of this callback. @@ -175,7 +179,6 @@ void call_rcu(struct rcu_head *head, unsigned long flags; head->func = func; - head->next = NULL; local_irq_save(flags); __call_rcu(head, &rcu_ctrlblk, &__get_cpu_var(rcu_data)); local_irq_restore(flags); @@ -204,7 +207,6 @@ void call_rcu_bh(struct rcu_head *head, unsigned long flags; head->func = func; - head->next = NULL; local_irq_save(flags); __call_rcu(head, &rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data)); local_irq_restore(flags); @@ -467,17 +469,17 @@ static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list, static void __rcu_offline_cpu(struct rcu_data *this_rdp, struct rcu_ctrlblk *rcp, struct rcu_data *rdp) { - /* if the cpu going offline owns the grace period + /* + * if the cpu going offline owns the grace period * we can block indefinitely waiting for it, so flush * it here */ spin_lock_bh(&rcp->lock); if (rcp->cur != rcp->completed) cpu_quiet(rdp->cpu, rcp); - spin_unlock_bh(&rcp->lock); - /* spin_lock implies smp_mb() */ rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail, rcp->cur + 1); rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail[2], rcp->cur + 1); + spin_unlock_bh(&rcp->lock); local_irq_disable(); this_rdp->qlen += rdp->qlen; @@ -511,16 +513,19 @@ static void rcu_offline_cpu(int cpu) static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) { + long completed_snap; + if (rdp->nxtlist) { local_irq_disable(); + completed_snap = ACCESS_ONCE(rcp->completed); /* * move the other grace-period-completed entries to * [rdp->nxtlist, *rdp->nxttail[0]) temporarily */ - if (!rcu_batch_before(rcp->completed, rdp->batch)) + if (!rcu_batch_before(completed_snap, rdp->batch)) rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2]; - else if (!rcu_batch_before(rcp->completed, rdp->batch - 1)) + else if (!rcu_batch_before(completed_snap, rdp->batch - 1)) rdp->nxttail[0] = rdp->nxttail[1]; /* @@ -561,8 +566,24 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, static void rcu_process_callbacks(struct softirq_action *unused) { + /* + * Memory references from any prior RCU read-side critical sections + * executed by the interrupted code must be see before any RCU + * grace-period manupulations below. + */ + + smp_mb(); /* See above block comment. */ + __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data)); __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data)); + + /* + * Memory references from any later RCU read-side critical sections + * executed by the interrupted code must be see after any RCU + * grace-period manupulations above. + */ + + smp_mb(); /* See above block comment. */ } static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) @@ -571,13 +592,15 @@ static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) check_cpu_stall(rcp, rdp); if (rdp->nxtlist) { + long completed_snap = ACCESS_ONCE(rcp->completed); + /* * This cpu has pending rcu entries and the grace period * for them has completed. */ - if (!rcu_batch_before(rcp->completed, rdp->batch)) + if (!rcu_batch_before(completed_snap, rdp->batch)) return 1; - if (!rcu_batch_before(rcp->completed, rdp->batch - 1) && + if (!rcu_batch_before(completed_snap, rdp->batch - 1) && rdp->nxttail[0] != rdp->nxttail[1]) return 1; if (rdp->nxttail[0] != &rdp->nxtlist) @@ -628,6 +651,12 @@ int rcu_needs_cpu(int cpu) return !!rdp->nxtlist || !!rdp_bh->nxtlist || rcu_pending(cpu); } +/* + * Top-level function driving RCU grace-period detection, normally + * invoked from the scheduler-clock interrupt. This function simply + * increments counters that are read only from softirq by this same + * CPU, so there are no memory barriers required. + */ void rcu_check_callbacks(int cpu, int user) { if (user || @@ -671,6 +700,7 @@ void rcu_check_callbacks(int cpu, int user) static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, struct rcu_data *rdp) { + spin_lock(&rcp->lock); memset(rdp, 0, sizeof(*rdp)); rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2] = &rdp->nxtlist; rdp->donetail = &rdp->donelist; @@ -678,6 +708,7 @@ static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, rdp->qs_pending = 0; rdp->cpu = cpu; rdp->blimit = blimit; + spin_unlock(&rcp->lock); } static void __cpuinit rcu_online_cpu(int cpu) -- cgit v0.10.2 From 516cbf3730c49739629d66313b20bdc50c98aa2c Mon Sep 17 00:00:00 2001 From: Tim Bird Date: Tue, 12 Aug 2008 12:52:36 -0700 Subject: x86, bootup: add built-in kernel command line for x86 (v2) Allow x86 to support a built-in kernel command line. The built-in command line can override the one provided by the boot loader, for those cases where the boot loader is broken or it is difficult to change the command line in the the boot loader. H. Peter Anvin wrote: > Ingo Molnar wrote: >> Best would be to make it really apparent in the code that nothing >> changes if this config option is not set. Preferably there should be >> no extra code at all in that case. >> > > I would like to see this: [...Nested ifdefs...] OK. This version changes absolutely nothing if CONFIG_CMDLINE_BOOL is not set (the default). Also, no space is appended even when CONFIG_CMDLINE_BOOL is set, but the builtin string is empty. This is less sloppy all the way around, IMHO. Note that I use the same option names as on other arches for this feature. [ mingo@elte.hu: build fix ] Signed-off-by: Tim Bird Cc: Matt Mackall Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ac2fb06..fbcb79b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1392,6 +1392,51 @@ config COMPAT_VDSO If unsure, say Y. +config CMDLINE_BOOL + bool "Built-in kernel command line" + default n + help + Allow for specifying boot arguments to the kernel at + build time. On some systems (e.g. embedded ones), it is + necessary or convenient to provide some or all of the + kernel boot arguments with the kernel itself (that is, + to not rely on the boot loader to provide them.) + + To compile command line arguments into the kernel, + set this option to 'Y', then fill in the + the boot arguments in CONFIG_CMDLINE. + + Systems with fully functional boot loaders (i.e. non-embedded) + should leave this option set to 'N'. + +config CMDLINE + string "Built-in kernel command string" + depends on CMDLINE_BOOL + default "" + help + Enter arguments here that should be compiled into the kernel + image and used at boot time. If the boot loader provides a + command line at boot time, it is appended to this string to + form the full kernel command line, when the system boots. + + However, you can use the CONFIG_CMDLINE_OVERRIDE option to + change this behavior. + + In most cases, the command line (whether built-in or provided + by the boot loader) should specify the device for the root + file system. + +config CMDLINE_OVERRIDE + bool "Built-in command line overrides boot loader arguments" + default n + depends on CMDLINE_BOOL + help + Set this option to 'Y' to have the kernel ignore the boot loader + command line, and use ONLY the built-in command line. + + This is used to work around broken boot loaders. This should + be set to 'N' under normal conditions. + endmenu config ARCH_ENABLE_MEMORY_HOTPLUG diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 68b48e3..2f31cdd 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -223,6 +223,9 @@ unsigned long saved_video_mode; #define RAMDISK_LOAD_FLAG 0x4000 static char __initdata command_line[COMMAND_LINE_SIZE]; +#ifdef CONFIG_CMDLINE_BOOL +static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; +#endif #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) struct edd edd; @@ -673,6 +676,19 @@ void __init setup_arch(char **cmdline_p) bss_resource.start = virt_to_phys(&__bss_start); bss_resource.end = virt_to_phys(&__bss_stop)-1; +#ifdef CONFIG_CMDLINE_BOOL +#ifdef CONFIG_CMDLINE_OVERRIDE + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); +#else + if (builtin_cmdline[0]) { + /* append boot loader cmdline to builtin */ + strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); + strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); + } +#endif +#endif + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; -- cgit v0.10.2 From 1ac2f7d55b7ee1613c90631e87fea22ec06781e5 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 4 Aug 2008 14:51:24 +0800 Subject: introduce two APIs for page attribute Introduce two APIs for page attribute. flushing tlb/cache in every page attribute is expensive. AGP gart usually will do a lot of operations to change a page to uc, new APIs can reduce flush. Signed-off-by: Shaohua Li Cc: airlied@linux.ie Cc: Andrew Morton Cc: Arjan van de Ven Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 65c6e46..2c5c18c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -752,12 +752,12 @@ static inline int cache_attr(pgprot_t attr) (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD); } -static int change_page_attr_set_clr(unsigned long addr, int numpages, +static int do_change_page_attr_set_clr(unsigned long addr, int numpages, pgprot_t mask_set, pgprot_t mask_clr, - int force_split) + int force_split, int *tlb_flush) { struct cpa_data cpa; - int ret, cache, checkalias; + int ret, checkalias; /* * Check, if we are requested to change a not supported @@ -792,9 +792,22 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, /* * Check whether we really changed something: */ - if (!cpa.flushtlb) - goto out; + *tlb_flush = cpa.flushtlb; + cpa_fill_pool(NULL); + + return ret; +} + +static int change_page_attr_set_clr(unsigned long addr, int numpages, + pgprot_t mask_set, pgprot_t mask_clr, + int force_split) +{ + int cache, flush_cache = 0, ret; + ret = do_change_page_attr_set_clr(addr, numpages, mask_set, mask_clr, + force_split, &flush_cache); + if (!flush_cache) + goto out; /* * No need to flush, when we did not set any of the caching * attributes: @@ -811,10 +824,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, cpa_flush_range(addr, numpages, cache); else cpa_flush_all(cache); - out: - cpa_fill_pool(NULL); - return ret; } @@ -852,6 +862,30 @@ int set_memory_uc(unsigned long addr, int numpages) } EXPORT_SYMBOL(set_memory_uc); +int set_memory_uc_noflush(unsigned long addr, int numpages) +{ + int flush; + /* + * for now UC MINUS. see comments in ioremap_nocache() + */ + if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, + _PAGE_CACHE_UC_MINUS, NULL)) + return -EINVAL; + /* + * for now UC MINUS. see comments in ioremap_nocache() + */ + return do_change_page_attr_set_clr(addr, numpages, + __pgprot(_PAGE_CACHE_UC_MINUS), + __pgprot(0), 0, &flush); +} +EXPORT_SYMBOL(set_memory_uc_noflush); + +void set_memory_flush_all(void) +{ + cpa_flush_all(1); +} +EXPORT_SYMBOL(set_memory_flush_all); + int _set_memory_wc(unsigned long addr, int numpages) { return change_page_attr_set(addr, numpages, @@ -926,6 +960,14 @@ int set_pages_uc(struct page *page, int numpages) } EXPORT_SYMBOL(set_pages_uc); +int set_pages_uc_noflush(struct page *page, int numpages) +{ + unsigned long addr = (unsigned long)page_address(page); + + return set_memory_uc_noflush(addr, numpages); +} +EXPORT_SYMBOL(set_pages_uc_noflush); + int set_pages_wb(struct page *page, int numpages) { unsigned long addr = (unsigned long)page_address(page); diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h index f4c0ab5..57bac7b 100644 --- a/include/asm-x86/cacheflush.h +++ b/include/asm-x86/cacheflush.h @@ -57,6 +57,8 @@ int _set_memory_uc(unsigned long addr, int numpages); int _set_memory_wc(unsigned long addr, int numpages); int _set_memory_wb(unsigned long addr, int numpages); int set_memory_uc(unsigned long addr, int numpages); +int set_memory_uc_noflush(unsigned long addr, int numpages); +void set_memory_flush_all(void); int set_memory_wc(unsigned long addr, int numpages); int set_memory_wb(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); @@ -87,6 +89,7 @@ int set_memory_4k(unsigned long addr, int numpages); */ int set_pages_uc(struct page *page, int numpages); +int set_pages_uc_noflush(struct page *page, int numpages); int set_pages_wb(struct page *page, int numpages); int set_pages_x(struct page *page, int numpages); int set_pages_nx(struct page *page, int numpages); -- cgit v0.10.2 From 466ae837424dcc538b1af2a0eaf53be32edcdbe7 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 4 Aug 2008 14:51:30 +0800 Subject: reduce tlb/cache flush times of agpgart memory allocation To reduce tlb/cache flush, makes agp memory allocation do one flush after all pages in a region are changed to uc. All agp drivers except agp-sgi uses agp_generic_alloc_page() for .agp_alloc_page, so the patch should work for them. agp-sgi is only for ia64, so not a problem too. Signed-off-by: Shaohua Li Cc: airlied@linux.ie Cc: Andrew Morton Cc: Arjan van de Ven Signed-off-by: Ingo Molnar diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h index 81e14be..395168f 100644 --- a/drivers/char/agp/agp.h +++ b/drivers/char/agp/agp.h @@ -30,6 +30,10 @@ #define _AGP_BACKEND_PRIV_H 1 #include /* for flush_agp_cache() */ +#ifndef map_page_into_agp_noflush +#define map_page_into_agp_noflush(page) map_page_into_agp(page) +#define map_page_into_agp_global_flush() +#endif #define PFX "agpgart: " diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c index eaa1a35..bf239b8 100644 --- a/drivers/char/agp/generic.c +++ b/drivers/char/agp/generic.c @@ -274,6 +274,7 @@ struct agp_memory *agp_allocate_memory(struct agp_bridge_data *bridge, new->memory[i] = virt_to_gart(addr); new->page_count++; } + map_page_into_agp_global_flush(); new->bridge = bridge; return new; @@ -1186,7 +1187,8 @@ void *agp_generic_alloc_page(struct agp_bridge_data *bridge) if (page == NULL) return NULL; - map_page_into_agp(page); + /* agp_allocate_memory will do flush */ + map_page_into_agp_noflush(page); get_page(page); atomic_inc(&agp_bridge->current_memory_agp); diff --git a/include/asm-x86/agp.h b/include/asm-x86/agp.h index e4004a9..181b9e9 100644 --- a/include/asm-x86/agp.h +++ b/include/asm-x86/agp.h @@ -15,6 +15,9 @@ #define map_page_into_agp(page) set_pages_uc(page, 1) #define unmap_page_from_agp(page) set_pages_wb(page, 1) +#define map_page_into_agp_noflush(page) set_pages_uc_noflush(page, 1) +#define map_page_into_agp_global_flush() set_memory_flush_all() + /* * Could use CLFLUSH here if the cpu supports it. But then it would * need to be called for each cacheline of the whole page so it may -- cgit v0.10.2 From 2bd455dbfebfd632a8dcf1d3d1612737986fde0a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 4 Aug 2008 11:26:38 +0800 Subject: x86: remove nesting CONFIG_HOTPLUG_CPU prefill_possible_map() is defined inside CONFIG_HOTPLUG_CPU, so the nesting CONFIG_HOTPLUG_CPU is just redundant. Signed-off-by: Li Zefan Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3325127..d5e19c3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1271,16 +1271,13 @@ __init void prefill_possible_map(void) if (!num_processors) num_processors = 1; -#ifdef CONFIG_HOTPLUG_CPU if (additional_cpus == -1) { if (disabled_cpus > 0) additional_cpus = disabled_cpus; else additional_cpus = 0; } -#else - additional_cpus = 0; -#endif + possible = num_processors + additional_cpus; if (possible > NR_CPUS) possible = NR_CPUS; -- cgit v0.10.2 From 020878ac427aa053414602cef975c2b5a2e33bf8 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Sat, 2 Aug 2008 21:23:36 +0200 Subject: x86: coding style fixes to arch/x86/boot/compressed/misc.c Before: total: 4 errors, 6 warnings, 439 lines checked After: total: 1 errors, 5 warnings, 441 lines checked Before -#include +#include paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/misc.o.* 8b2394e1fe519a9542e9a7e3e7b69c39 /tmp/misc.o.after 8b2394e1fe519a9542e9a7e3e7b69c39 /tmp/misc.o.before After -#include +#include paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/misc.o.* 59a2d264284be5e72b5af4f3a8ccfb47 /tmp/misc.o.after 8b2394e1fe519a9542e9a7e3e7b69c39 /tmp/misc.o.before Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 9fea737..2a3f77e5 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include @@ -251,7 +251,7 @@ static void __putstr(int error, const char *s) y--; } } else { - vidmem [(x + cols * y) * 2] = c; + vidmem[(x + cols * y) * 2] = c; if (++x >= cols) { x = 0; if (++y >= lines) { @@ -277,7 +277,8 @@ static void *memset(void *s, int c, unsigned n) int i; char *ss = s; - for (i = 0; i < n; i++) ss[i] = c; + for (i = 0; i < n; i++) + ss[i] = c; return s; } @@ -287,7 +288,8 @@ static void *memcpy(void *dest, const void *src, unsigned n) const char *s = src; char *d = dest; - for (i = 0; i < n; i++) d[i] = s[i]; + for (i = 0; i < n; i++) + d[i] = s[i]; return dest; } -- cgit v0.10.2 From 2070dae10f50ec244f58292436ace9a3f9dc1d71 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Sat, 2 Aug 2008 21:24:06 +0200 Subject: x86: coding style fixes to arch/x86/kernel/bios_uv.c paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/bios_uv.o.* 9afe794594831166704744184e192ed8 /tmp/bios_uv.o.after 9afe794594831166704744184e192ed8 /tmp/bios_uv.o.before Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index c639bd5..fdd585f 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c @@ -25,11 +25,11 @@ x86_bios_strerror(long status) { const char *str; switch (status) { - case 0: str = "Call completed without error"; break; - case -1: str = "Not implemented"; break; - case -2: str = "Invalid argument"; break; - case -3: str = "Call completed with error"; break; - default: str = "Unknown BIOS status code"; break; + case 0: str = "Call completed without error"; break; + case -1: str = "Not implemented"; break; + case -2: str = "Invalid argument"; break; + case -3: str = "Call completed with error"; break; + default: str = "Unknown BIOS status code"; break; } return str; } -- cgit v0.10.2 From 209b580fd8c3a42b69550c98de434671d41a4ebb Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Sat, 2 Aug 2008 21:24:45 +0200 Subject: x86: coding style fixes to arch/x86/lib/strstr_32.c Before: total: 3 errors, 0 warnings, 31 lines checked After: total: 0 errors, 0 warnings, 31 lines checked paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/strstr_32.o.* c96006ec3387862e5bacb139207a3098 /tmp/strstr_32.o.after c96006ec3387862e5bacb139207a3098 /tmp/strstr_32.o.before Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar diff --git a/arch/x86/lib/strstr_32.c b/arch/x86/lib/strstr_32.c index 42e8a50..8e2d55f 100644 --- a/arch/x86/lib/strstr_32.c +++ b/arch/x86/lib/strstr_32.c @@ -23,9 +23,9 @@ __asm__ __volatile__( "jne 1b\n\t" "xorl %%eax,%%eax\n\t" "2:" - :"=a" (__res), "=&c" (d0), "=&S" (d1) - :"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct) - :"dx", "di"); + : "=a" (__res), "=&c" (d0), "=&S" (d1) + : "0" (0), "1" (0xffffffff), "2" (cs), "g" (ct) + : "dx", "di"); return __res; } -- cgit v0.10.2 From 3492cdf0176bde5e35223a1388d59676bc67c145 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Sat, 2 Aug 2008 21:25:13 +0200 Subject: x86: coding style fixes to arch/x86/lib/string_32.c Before: total: 21 errors, 0 warnings, 237 lines checked After: total: 0 errors, 0 warnings, 237 lines checked paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/string_32.o.* c55d059ef1612b32a8bb2771a72ae0d5 /tmp/string_32.o.after c55d059ef1612b32a8bb2771a72ae0d5 /tmp/string_32.o.before Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c index 94972e7..82004d2 100644 --- a/arch/x86/lib/string_32.c +++ b/arch/x86/lib/string_32.c @@ -22,7 +22,7 @@ char *strcpy(char *dest, const char *src) "testb %%al,%%al\n\t" "jne 1b" : "=&S" (d0), "=&D" (d1), "=&a" (d2) - :"0" (src), "1" (dest) : "memory"); + : "0" (src), "1" (dest) : "memory"); return dest; } EXPORT_SYMBOL(strcpy); @@ -42,7 +42,7 @@ char *strncpy(char *dest, const char *src, size_t count) "stosb\n" "2:" : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) - :"0" (src), "1" (dest), "2" (count) : "memory"); + : "0" (src), "1" (dest), "2" (count) : "memory"); return dest; } EXPORT_SYMBOL(strncpy); @@ -60,7 +60,7 @@ char *strcat(char *dest, const char *src) "testb %%al,%%al\n\t" "jne 1b" : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) - : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu): "memory"); + : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu) : "memory"); return dest; } EXPORT_SYMBOL(strcat); @@ -105,9 +105,9 @@ int strcmp(const char *cs, const char *ct) "2:\tsbbl %%eax,%%eax\n\t" "orb $1,%%al\n" "3:" - :"=a" (res), "=&S" (d0), "=&D" (d1) - :"1" (cs), "2" (ct) - :"memory"); + : "=a" (res), "=&S" (d0), "=&D" (d1) + : "1" (cs), "2" (ct) + : "memory"); return res; } EXPORT_SYMBOL(strcmp); @@ -130,9 +130,9 @@ int strncmp(const char *cs, const char *ct, size_t count) "3:\tsbbl %%eax,%%eax\n\t" "orb $1,%%al\n" "4:" - :"=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2) - :"1" (cs), "2" (ct), "3" (count) - :"memory"); + : "=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2) + : "1" (cs), "2" (ct), "3" (count) + : "memory"); return res; } EXPORT_SYMBOL(strncmp); @@ -152,9 +152,9 @@ char *strchr(const char *s, int c) "movl $1,%1\n" "2:\tmovl %1,%0\n\t" "decl %0" - :"=a" (res), "=&S" (d0) - :"1" (s), "0" (c) - :"memory"); + : "=a" (res), "=&S" (d0) + : "1" (s), "0" (c) + : "memory"); return res; } EXPORT_SYMBOL(strchr); @@ -169,9 +169,9 @@ size_t strlen(const char *s) "scasb\n\t" "notl %0\n\t" "decl %0" - :"=c" (res), "=&D" (d0) - :"1" (s), "a" (0), "0" (0xffffffffu) - :"memory"); + : "=c" (res), "=&D" (d0) + : "1" (s), "a" (0), "0" (0xffffffffu) + : "memory"); return res; } EXPORT_SYMBOL(strlen); @@ -189,9 +189,9 @@ void *memchr(const void *cs, int c, size_t count) "je 1f\n\t" "movl $1,%0\n" "1:\tdecl %0" - :"=D" (res), "=&c" (d0) - :"a" (c), "0" (cs), "1" (count) - :"memory"); + : "=D" (res), "=&c" (d0) + : "a" (c), "0" (cs), "1" (count) + : "memory"); return res; } EXPORT_SYMBOL(memchr); @@ -228,9 +228,9 @@ size_t strnlen(const char *s, size_t count) "cmpl $-1,%1\n\t" "jne 1b\n" "3:\tsubl %2,%0" - :"=a" (res), "=&d" (d0) - :"c" (s), "1" (count) - :"memory"); + : "=a" (res), "=&d" (d0) + : "c" (s), "1" (count) + : "memory"); return res; } EXPORT_SYMBOL(strnlen); -- cgit v0.10.2 From d9336a9b47d57db835453968efbd0d5cedfe0260 Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Sat, 2 Aug 2008 21:25:43 +0200 Subject: x86: coding style fixes to arch/x86/kernel/paravirt_patch_32.c Before: total: 3 errors, 1 warnings, 49 lines checked After: total: 2 errors, 1 warnings, 49 lines checked paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/paravirt_patch_32.o.* a78eea4264723e18c49dcfbe0ee0aae7 /tmp/paravirt_patch_32.o.after a78eea4264723e18c49dcfbe0ee0aae7 /tmp/paravirt_patch_32.o.before Signed-off-by: Paolo Ciarrocchi Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 5826221..9fe644f 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -23,7 +23,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, start = start_##ops##_##x; \ end = end_##ops##_##x; \ goto patch_site - switch(type) { + switch (type) { PATCH_SITE(pv_irq_ops, irq_disable); PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, restore_fl); -- cgit v0.10.2 From ff9cf2ce7afe76435d66c898cc9dacaa68e79d41 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 1 Aug 2008 14:10:02 -0700 Subject: rcu: fixes to include/linux/rcupreempt.h Hello! Compared tip/core/rcu to my latest patchset, and found the following issues: o the memory barrier in rcu_exit_nohz() somehow got out of place (it is correct in mainline as of 2.6.26-rc7). o There is a duplicate declaration of rcu_dyntick_sched. The attached patch fixes these. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index 0967f03..addb5e2 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -111,7 +111,6 @@ extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu); struct softirq_action; #ifdef CONFIG_NO_HZ -DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched); static inline void rcu_enter_nohz(void) { @@ -126,8 +125,8 @@ static inline void rcu_exit_nohz(void) { static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1); - smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ __get_cpu_var(rcu_dyntick_sched).dynticks++; + smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1), &rs); } -- cgit v0.10.2 From 34d7c2b38d124219b7034356716e3455c439acd3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 1 Aug 2008 14:11:05 -0700 Subject: rcu: remove list_for_each_rcu() All of the in-tree uses of list_for_each_rcu() have been converted to list_for_each_entry_rcu(), so list_for_each_rcu() can now be removed. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index cf5562c..6e25340 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -210,7 +210,7 @@ over a rather long period of time, but improvements are always welcome! number of updates per grace period. 9. All RCU list-traversal primitives, which include - rcu_dereference(), list_for_each_rcu(), list_for_each_entry_rcu(), + rcu_dereference(), list_for_each_entry_rcu(), list_for_each_continue_rcu(), and list_for_each_safe_rcu(), must be either within an RCU read-side critical section or must be protected by appropriate update-side locks. RCU diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index e04d643..9617082 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -786,8 +786,6 @@ RCU pointer/list traversal: list_for_each_entry_rcu hlist_for_each_entry_rcu - list_for_each_rcu (to be deprecated in favor of - list_for_each_entry_rcu) list_for_each_continue_rcu (to be deprecated in favor of new list_for_each_entry_continue_rcu) diff --git a/include/linux/rculist.h b/include/linux/rculist.h index eb4443c..e649bd3 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -198,20 +198,6 @@ static inline void list_splice_init_rcu(struct list_head *list, at->prev = last; } -/** - * list_for_each_rcu - iterate over an rcu-protected list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->next); \ - prefetch(pos->next), pos != (head); \ - pos = rcu_dereference(pos->next)) - #define __list_for_each_rcu(pos, head) \ for (pos = rcu_dereference((head)->next); \ pos != (head); \ -- cgit v0.10.2 From 07dd20e0324f4d3e33bde1944d4f7771a09c498c Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Fri, 1 Aug 2008 13:18:04 +0100 Subject: sched: reorder signal_struct to remove 8 bytes on 64 bit builds reorder structure to remove 8 bytes of padding on 64 bit builds Signed-off-by: Richard Kennedy Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index cfb0d87..5a8058e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -451,8 +451,8 @@ struct signal_struct { * - everyone except group_exit_task is stopped during signal delivery * of fatal signals, group_exit_task processes the signal. */ - struct task_struct *group_exit_task; int notify_count; + struct task_struct *group_exit_task; /* thread group stop support, overloads group_exit_code too */ int group_stop_count; -- cgit v0.10.2 From bee367ed066e26c14263d808136fba8eec3bd70a Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Fri, 1 Aug 2008 13:24:08 +0100 Subject: sched: reorder struct sched_rt_entity to remove padding on 64 bit builds remove 8 bytes of padding on 64 bit builds (also removes 8 bytes from task_struct) Signed-off-by: Richard Kennedy Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index 5a8058e..08a87b5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1010,8 +1010,8 @@ struct sched_entity { struct sched_rt_entity { struct list_head run_list; - unsigned int time_slice; unsigned long timeout; + unsigned int time_slice; int nr_cpus_allowed; struct sched_rt_entity *back; -- cgit v0.10.2 From 3fb669dd6ec11e14819c0114a0e68a9ddcec65e1 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Fri, 1 Aug 2008 13:36:28 +0100 Subject: reorder struct prop_local_single to remove padding on 64 bit builds reorder structure to remove 8 bytes of padding on 64 bit builds (also removes 8 bytes from task_struct) Signed-off-by: Richard Kennedy Cc: peterz@infradead.org Signed-off-by: Ingo Molnar diff --git a/include/linux/proportions.h b/include/linux/proportions.h index 5afc1b2..cf793bb 100644 --- a/include/linux/proportions.h +++ b/include/linux/proportions.h @@ -104,8 +104,8 @@ struct prop_local_single { * snapshot of the last seen global state * and a lock protecting this state */ - int shift; unsigned long period; + int shift; spinlock_t lock; /* protect the snapshot state */ }; -- cgit v0.10.2 From d33dcb9e7d272cc3171dcc3a21049902185ab03d Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Fri, 1 Aug 2008 12:46:45 +0200 Subject: x86: Microcode patch loader style corrections Style corrections to main microcode module. Signed-off-by: Peter Oruba Cc: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 28dba1a..39961bb 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -55,8 +55,8 @@ * in a single CPU package. * 1.10 28 Feb 2002 Asit K Mallick and * Tigran Aivazian , - * Serialize updates as required on HT processors due to speculative - * nature of implementation. + * Serialize updates as required on HT processors due to + * speculative nature of implementation. * 1.11 22 Mar 2002 Tigran Aivazian * Fix the panic when writing zero-length microcode chunk. * 1.12 29 Sep 2003 Nitin Kamble , @@ -71,7 +71,7 @@ * Thanks to Stuart Swales for pointing out this bug. */ -//#define DEBUG /* pr_debug */ +/*#define DEBUG pr_debug */ #include #include #include @@ -116,7 +116,7 @@ EXPORT_SYMBOL_GPL(user_buffer); unsigned int user_buffer_size; /* it's size */ EXPORT_SYMBOL_GPL(user_buffer_size); -static int do_microcode_update (void) +static int do_microcode_update(void) { long cursor = 0; int error = 0; @@ -158,18 +158,20 @@ out: return error; } -static int microcode_open (struct inode *unused1, struct file *unused2) +static int microcode_open(struct inode *unused1, struct file *unused2) { cycle_kernel_lock(); return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; } -static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) +static ssize_t microcode_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) { ssize_t ret; if ((len >> PAGE_SHIFT) > num_physpages) { - printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); + printk(KERN_ERR "microcode: too much data (max %ld pages)\n", + num_physpages); return -EINVAL; } @@ -201,7 +203,7 @@ static struct miscdevice microcode_dev = { .fops = µcode_fops, }; -static int __init microcode_dev_init (void) +static int __init microcode_dev_init(void) { int error; @@ -216,7 +218,7 @@ static int __init microcode_dev_init (void) return 0; } -static void microcode_dev_exit (void) +static void microcode_dev_exit(void) { misc_deregister(µcode_dev); } @@ -224,7 +226,7 @@ static void microcode_dev_exit (void) MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); #else #define microcode_dev_init() 0 -#define microcode_dev_exit() do { } while(0) +#define microcode_dev_exit() do { } while (0) #endif /* fake device for request_firmware */ @@ -451,7 +453,7 @@ int microcode_init(void *opaque, struct module *module) } EXPORT_SYMBOL_GPL(microcode_init); -void __exit microcode_exit (void) +void __exit microcode_exit(void) { microcode_dev_exit(); -- cgit v0.10.2 From 636a31781684d0f49208aa163f1a5a3a74210eb4 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Fri, 1 Aug 2008 12:46:46 +0200 Subject: x86: Fixed NULL function pointer dereference in AMD microcode patch loader. Dereference took place in code part responsible for manual installation of microcode patches through /dev/cpu/microcode. Signed-off-by: Peter Oruba Cc: Peter Oruba Cc: Tigran Aivazian Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 39961bb..ad136ad 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -127,7 +127,8 @@ static int do_microcode_update(void) old = current->cpus_allowed; while ((cursor = microcode_ops->get_next_ucode(&new_mc, cursor)) > 0) { - error = microcode_ops->microcode_sanity_check(new_mc); + if (microcode_ops->microcode_sanity_check != NULL) + error = microcode_ops->microcode_sanity_check(new_mc); if (error) goto out; /* -- cgit v0.10.2 From 5843d9a4d0ba89719916c8f07fc9c57b7126be6d Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 1 Aug 2008 03:15:21 +0200 Subject: x86, pat: avoid highmem cache attribute aliasing Highmem code can leave ptes and tlb entries around for a given page even after kunmap, and after it has been freed. >From what I can gather, the PAT code may change the cache attributes of arbitrary physical addresses (ie. including highmem pages), which would result in aliases in the case that it operates on one of these lazy tlb highmem pages. Flushing kmaps should solve the problem. I've also just added code for conditional flushing if we haven't got any dangling highmem aliases -- this should help performance if we change page attributes frequently or systems that aren't using much highmem pages (eg. if < 4G RAM). Should be turned into 2 patches, but just for RFC... Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 2c5c18c..4adb336 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -777,6 +777,9 @@ static int do_change_page_attr_set_clr(unsigned long addr, int numpages, WARN_ON_ONCE(1); } + /* Must avoid aliasing mappings in the highmem code */ + kmap_flush_unused(); + cpa.vaddr = addr; cpa.numpages = numpages; cpa.mask_set = mask_set; diff --git a/mm/highmem.c b/mm/highmem.c index e16e152..b36b83b 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -70,6 +70,7 @@ static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); static void flush_all_zero_pkmaps(void) { int i; + int need_flush = 0; flush_cache_kmaps(); @@ -101,8 +102,10 @@ static void flush_all_zero_pkmaps(void) &pkmap_page_table[i]); set_page_address(page, NULL); + need_flush = 1; } - flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP)); + if (need_flush) + flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP)); } /** -- cgit v0.10.2 From c9c3dddd8f9a05b25d4ce53e8e80cc0ea1759d18 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 1 Aug 2008 03:51:38 +0400 Subject: x86_64: remove empty lines from stack traces/oopses Signed-off-by: Alexey Dobriyan Cc: ak@suse.de Cc: akpm@osdl.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 3f18d73..8b5b3b8 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -339,9 +339,8 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, char *log_lvl) { - printk("\nCall Trace:\n"); + printk("Call Trace:\n"); dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); - printk("\n"); } void show_trace(struct task_struct *task, struct pt_regs *regs, @@ -386,6 +385,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, printk(" %016lx", *stack++); touch_nmi_watchdog(); } + printk("\n"); show_trace_log_lvl(task, regs, sp, bp, log_lvl); } @@ -443,7 +443,6 @@ void show_registers(struct pt_regs *regs) printk("Stack: "); show_stack_log_lvl(NULL, regs, (unsigned long *)sp, regs->bp, ""); - printk("\n"); printk(KERN_EMERG "Code: "); -- cgit v0.10.2 From dd0078f4f04d939950a792c493d7d97d7ce663b8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 30 Jul 2008 14:20:54 -0400 Subject: rcu: just rename call_rcu_bh instead of making it a macro Seems that I found a box that has a config that passes call_rcu_bh as a function pointer (see net/sctp/sm_make_chunk.c), so declaring the call_rcu_bh has a macro function isn't good enough. This patch makes it just another name of call_rcu for rcupreempt. Signed-off-by: Steven Rostedt Reviewed-by: "Paul E. McKenney" Signed-off-by: Ingo Molnar diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index addb5e2..3e05c09 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -57,7 +57,13 @@ static inline void rcu_qsctr_inc(int cpu) rdssp->sched_qs++; } #define rcu_bh_qsctr_inc(cpu) -#define call_rcu_bh(head, rcu) call_rcu(head, rcu) + +/* + * Someone might want to pass call_rcu_bh as a function pointer. + * So this needs to just be a rename and not a macro function. + * (no parentheses) + */ +#define call_rcu_bh call_rcu /** * call_rcu_sched - Queue RCU callback for invocation after sched grace period. -- cgit v0.10.2 From 5802294f1b1895ee19a3d0ae72805da453afb9de Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 30 Jul 2008 14:20:55 -0400 Subject: rcu: trace fix possible mem-leak In the initialization of the RCU trace module, if rcupreempt_debugfs_init() fails, we never free the the trace buffer. This patch frees the trace buffer in case the debugfs fails. Signed-off-by: Steven Rostedt Reviewed-by: "Paul E. McKenney" Signed-off-by: Ingo Molnar diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c index 5edf82c..35c2d33 100644 --- a/kernel/rcupreempt_trace.c +++ b/kernel/rcupreempt_trace.c @@ -308,11 +308,16 @@ out: static int __init rcupreempt_trace_init(void) { + int ret; + mutex_init(&rcupreempt_trace_mutex); rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL); if (!rcupreempt_trace_buf) return 1; - return rcupreempt_debugfs_init(); + ret = rcupreempt_debugfs_init(); + if (ret) + kfree(rcupreempt_trace_buf); + return ret; } static void __exit rcupreempt_trace_cleanup(void) -- cgit v0.10.2 From 48e2bd56b1d1ae4b95fb21be778927b64d5c4235 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Sat, 2 Aug 2008 12:50:37 -0300 Subject: x86: coding style fixes to arch/x86/kernel/traps_64.c Fix coding style of traps_64.c with improvements suggested by Ingo. Signed-off-by: Gustavo F. Padovan Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index fe36d96..5df5d2e 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -84,8 +84,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) void printk_address(unsigned long address, int reliable) { - printk(" [<%016lx>] %s%pS\n", address, reliable ? - "" : "? ", (void *) address); + printk(" [<%016lx>] %s%pS\n", + address, reliable ? "" : "? ", (void *) address); } static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, @@ -98,8 +98,8 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, [STACKFAULT_STACK - 1] = "#SS", [MCE_STACK - 1] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ - [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / - EXCEPTION_STKSZ - 2] = "#DB[?]" + [N_EXCEPTION_STACKS ... + N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" #endif }; unsigned k; @@ -363,8 +363,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); - /* debugging aid: "show_stack(NULL, NULL);" prints the - back trace for this cpu. */ + /* + * debugging aid: "show_stack(NULL, NULL);" prints the + * back trace for this cpu. + */ if (sp == NULL) { if (task) -- cgit v0.10.2 From f88f07e0f0fd6376e081b10930d272a08fbf082f Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 14 Aug 2008 16:58:15 -0400 Subject: x86: alternatives : fix LOCK_PREFIX race with preemptible kernel and CPU hotplug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a kernel thread is preempted in single-cpu mode right after the NOP (nop about to be turned into a lock prefix), then we CPU hotplug a CPU, and then the thread is scheduled back again, a SMP-unsafe atomic operation will be used on shared SMP variables, leading to corruption. No corruption would happen in the reverse case : going from SMP to UP is ok because we split a bit instruction into tiny pieces, which does not present this condition. Changing the 0x90 (single-byte nop) currently used into a 0x3E DS segment override prefix should fix this issue. Since the default of the atomic instructions is to use the DS segment anyway, it should not affect the behavior. The exception to this are references that use ESP/RSP and EBP/RBP as the base register (they will use the SS segment), however, in Linux (a) DS == SS at all times, and (b) we do not distinguish between segment violations reported as #SS as opposed to #GP, so there is no need to disassemble the instruction to figure out the suitable segment. This patch assumes that the 0x3E prefix will leave atomic operations as-is (thus assuming they normally touch data in the DS segment). Since there seem to be no obvious ill-use of other segment override prefixes for atomic operations, it should be safe. It can be verified with a quick grep -r LOCK_PREFIX include/asm-x86/ grep -A 1 -r LOCK_PREFIX arch/x86/ Taken from This source : AMD64 Architecture Programmer's Manual Volume 3: General-Purpose and System Instructions States "Instructions that Reference a Non-Stack Segment—If an instruction encoding references any base register other than rBP or rSP, or if an instruction contains an immediate offset, the default segment is the data segment (DS). These instructions can use the segment-override prefix to select one of the non-default segments, as shown in Table 1-5." Therefore, forcing the DS segment on the atomic operations, which already use the DS segment, should not change. This source : http://wiki.osdev.org/X86_Instruction_Encoding States "In 64-bit the CS, SS, DS and ES segment overrides are ignored." Confirmed by "AMD 64-Bit Technology" A.7 http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/x86-64_overview.pdf "In 64-bit mode, the DS, ES, SS and CS segment-override prefixes have no effect. These four prefixes are no longer treated as segment-override prefixes in the context of multipleprefix rules. Instead, they are treated as null prefixes." This patch applies to 2.6.27-rc2, but would also have to be applied to earlier kernels (2.6.26, 2.6.25, ...). Performance impact of the fix : tests done on "xaddq" and "xaddl" shows it actually improves performances on Intel Xeon, AMD64, Pentium M. It does not change the performance on Pentium II, Pentium 3 and Pentium 4. Xeon E5405 2.0GHz : NR_TESTS 10000000 test empty cycles : 162207948 test test 1-byte nop xadd cycles : 170755422 test test DS override prefix xadd cycles : 170000118 * test test LOCK xadd cycles : 472012134 AMD64 2.0GHz : NR_TESTS 10000000 test empty cycles : 146674549 test test 1-byte nop xadd cycles : 150273860 test test DS override prefix xadd cycles : 149982382 * test test LOCK xadd cycles : 270000690 Pentium 4 3.0GHz NR_TESTS 10000000 test empty cycles : 290001195 test test 1-byte nop xadd cycles : 310000560 test test DS override prefix xadd cycles : 310000575 * test test LOCK xadd cycles : 1050103740 Pentium M 2.0GHz NR_TESTS 10000000 test empty cycles : 180000523 test test 1-byte nop xadd cycles : 320000345 test test DS override prefix xadd cycles : 310000374 * test test LOCK xadd cycles : 480000357 Pentium 3 550MHz NR_TESTS 10000000 test empty cycles : 510000231 test test 1-byte nop xadd cycles : 620000128 test test DS override prefix xadd cycles : 620000110 * test test LOCK xadd cycles : 800000088 Pentium II 350MHz NR_TESTS 10000000 test empty cycles : 200833494 test test 1-byte nop xadd cycles : 340000130 test test DS override prefix xadd cycles : 340000126 * test test LOCK xadd cycles : 530000078 Speed test modules can be found at http://ltt.polymtl.ca/svn/trunk/tests/kernel/test-prefix-speed-32.c http://ltt.polymtl.ca/svn/trunk/tests/kernel/test-prefix-speed.c Macro-benchmarks 2.0GHz E5405 Core 2 dual Quad-Core Xeon Summary * replace smp lock prefixes with DS segment selector prefixes no lock prefix (s) with lock prefix (s) Speedup make -j1 kernel/ 33.94 +/- 0.07 34.91 +/- 0.27 2.8 % hackbench 50 2.99 +/- 0.01 3.74 +/- 0.01 25.1 % * replace smp lock prefixes with 0x90 nops no lock prefix (s) with lock prefix (s) Speedup make -j1 kernel/ 34.16 +/- 0.32 34.91 +/- 0.27 2.2 % hackbench 50 3.00 +/- 0.01 3.74 +/- 0.01 24.7 % Detail : 1 CPU, replace smp lock prefixes with DS segment selector prefixes make -j1 kernel/ real 0m34.067s user 0m30.630s sys 0m2.980s real 0m33.867s user 0m30.582s sys 0m3.024s real 0m33.939s user 0m30.738s sys 0m2.876s real 0m33.913s user 0m30.806s sys 0m2.808s avg : 33.94s std. dev. : 0.07s hackbench 50 Time: 2.978 Time: 2.982 Time: 3.010 Time: 2.984 Time: 2.982 avg : 2.99 std. dev. : 0.01 1 CPU, noreplace-smp make -j1 kernel/ real 0m35.326s user 0m30.630s sys 0m3.260s real 0m34.325s user 0m30.802s sys 0m3.084s real 0m35.568s user 0m30.722s sys 0m3.168s real 0m34.435s user 0m30.886s sys 0m2.996s avg.: 34.91s std. dev. : 0.27s hackbench 50 Time: 3.733 Time: 3.750 Time: 3.761 Time: 3.737 Time: 3.741 avg : 3.74 std. dev. : 0.01 1 CPU, replace smp lock prefixes with 0x90 nops make -j1 kernel/ real 0m34.139s user 0m30.782s sys 0m2.820s real 0m34.010s user 0m30.630s sys 0m2.976s real 0m34.777s user 0m30.658s sys 0m2.916s real 0m33.924s user 0m30.634s sys 0m2.924s real 0m33.962s user 0m30.774s sys 0m2.800s real 0m34.141s user 0m30.770s sys 0m2.828s avg : 34.16 std. dev. : 0.32 hackbench 50 Time: 2.999 Time: 2.994 Time: 3.004 Time: 2.991 Time: 2.988 avg : 3.00 std. dev. : 0.01 I did more runs (20 runs of each) to compare the nop case to the DS prefix case. Results in seconds. They actually does not seems to show a significant difference. NOP 34.155 33.955 34.012 35.299 35.679 34.141 33.995 35.016 34.254 33.957 33.957 34.008 35.013 34.494 33.893 34.295 34.314 34.854 33.991 34.132 DS 34.080 34.304 34.374 35.095 34.291 34.135 33.940 34.208 35.276 34.288 33.861 33.898 34.610 34.709 33.851 34.256 35.161 34.283 33.865 35.078 Used http://www.graphpad.com/quickcalcs/ttest1.cfm?Format=C to do the T-test (yeah, I'm lazy) : Group Group One (DS prefix) Group Two (nops) Mean 34.37815 34.37070 SD 0.46108 0.51905 SEM 0.10310 0.11606 N 20 20 P value and statistical significance: The two-tailed P value equals 0.9620 By conventional criteria, this difference is considered to be not statistically significant. Confidence interval: The mean of Group One minus Group Two equals 0.00745 95% confidence interval of this difference: From -0.30682 to 0.32172 Intermediate values used in calculations: t = 0.0480 df = 38 standard error of difference = 0.155 So, unless these calculus are completely bogus, the difference between the nop and the DS case seems not to be statistically significant. Signed-off-by: Mathieu Desnoyers Acked-by: H. Peter Anvin CC: Linus Torvalds CC: Jeremy Fitzhardinge CC: Roland McGrath CC: Ingo Molnar Cc: Steven Rostedt CC: Steven Rostedt CC: Thomas Gleixner CC: Peter Zijlstra CC: Andrew Morton CC: David Miller CC: Ulrich Drepper CC: Rusty Russell CC: Gregory Haskins CC: Arnaldo Carvalho de Melo CC: "Luis Claudio R. Goncalves" CC: Clark Williams CC: Christoph Lameter CC: Andi Kleen CC: Harvey Harrison Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 2763cb3..7ead11f 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -241,25 +241,25 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) continue; if (*ptr > text_end) continue; - text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */ + /* turn DS segment override prefix into lock prefix */ + text_poke(*ptr, ((unsigned char []){0xf0}), 1); }; } static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) { u8 **ptr; - char insn[1]; if (noreplace_smp) return; - add_nops(insn, 1); for (ptr = start; ptr < end; ptr++) { if (*ptr < text) continue; if (*ptr > text_end) continue; - text_poke(*ptr, insn, 1); + /* turn lock prefix into DS segment override prefix */ + text_poke(*ptr, ((unsigned char []){0x3E}), 1); }; } -- cgit v0.10.2 From 1f49a2c2aeb22d5abc6d4ea574ff63d37ca55fbe Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 15 Aug 2008 12:45:09 -0400 Subject: x86: revert replace LOCK_PREFIX in futex.h Since we now use DS prefixes instead of NOP to remove LOCK prefixes, there are no longer any issues with instruction boundaries moving around. Depends on : x86 alternatives : fix LOCK_PREFIX race with preemptible kernel and CPU hotplug On Thu, 14 Aug 2008, Mathieu Desnoyers wrote: > > Changing the 0x90 (single-byte nop) currently used into a 0x3E DS segment > override prefix should fix this issue. Since the default of the atomic > instructions is to use the DS segment anyway, it should not affect the > behavior. Ok, so I think this is an _excellent_ patch, but I'd like to also then use LOCK_PREFIX in include/asm-x86/futex.h. See commit 9d55b9923a1b7ea8193b8875c57ec940dc2ff027. Linus Applies to 2.6.27-rc2 (and -rc3 unless hell broke loose in futex.h between rc2 and rc3). Signed-off-by: Mathieu Desnoyers CC: Linus Torvalds CC: H. Peter Anvin CC: Jeremy Fitzhardinge CC: Roland McGrath CC: Ingo Molnar Cc: Steven Rostedt CC: Steven Rostedt CC: Thomas Gleixner CC: Peter Zijlstra CC: Andrew Morton CC: David Miller CC: Ulrich Drepper CC: Rusty Russell CC: Gregory Haskins CC: Arnaldo Carvalho de Melo CC: "Luis Claudio R. Goncalves" CC: Clark Williams CC: Christoph Lameter CC: Andi Kleen CC: Harvey Harrison Signed-off-by: H. Peter Anvin diff --git a/include/asm-x86/futex.h b/include/asm-x86/futex.h index e7a76b3..d1b988c 100644 --- a/include/asm-x86/futex.h +++ b/include/asm-x86/futex.h @@ -25,7 +25,7 @@ asm volatile("1:\tmovl %2, %0\n" \ "\tmovl\t%0, %3\n" \ "\t" insn "\n" \ - "2:\tlock; cmpxchgl %3, %2\n" \ + "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ "\tjnz\t1b\n" \ "3:\t.section .fixup,\"ax\"\n" \ "4:\tmov\t%5, %1\n" \ @@ -64,7 +64,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); break; case FUTEX_OP_ADD: - __futex_atomic_op1("lock; xaddl %0, %2", ret, oldval, + __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval, uaddr, oparg); break; case FUTEX_OP_OR: @@ -122,7 +122,7 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - asm volatile("1:\tlock; cmpxchgl %3, %1\n" + asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n" "2:\t.section .fixup, \"ax\"\n" "3:\tmov %2, %0\n" "\tjmp 2b\n" -- cgit v0.10.2 From 5bbd4c3724008c93cf3efdfc38a3402e245ab506 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 15 Aug 2008 12:56:59 -0400 Subject: x86: spinlock use LOCK_PREFIX Since we are now using DS prefixes instead of NOP to remove LOCK prefixes, there is no longer any problems with instruction boundaries moving around. * Linus Torvalds (torvalds@linux-foundation.org) wrote: > > > On Thu, 14 Aug 2008, Mathieu Desnoyers wrote: > > > > Changing the 0x90 (single-byte nop) currently used into a 0x3E DS segment > > override prefix should fix this issue. Since the default of the atomic > > instructions is to use the DS segment anyway, it should not affect the > > behavior. > > Ok, so I think this is an _excellent_ patch, but I'd like to also then use > LOCK_PREFIX in include/asm-x86/futex.h. > > See commit 9d55b9923a1b7ea8193b8875c57ec940dc2ff027. > > Linus Unless there a rationale for this, I think these be changed to LOCK_PREFIX too. grep "lock ;" include/asm-x86/spinlock.h "lock ; cmpxchgw %w1,%2\n\t" asm volatile("lock ; xaddl %0, %1\n" "lock ; cmpxchgl %1,%2\n\t" Applies to 2.6.27-rc2. Signed-off-by: Mathieu Desnoyers Acked-by: Linus Torvalds CC: Linus Torvalds CC: H. Peter Anvin CC: Jeremy Fitzhardinge CC: Roland McGrath CC: Ingo Molnar Cc: Steven Rostedt CC: Steven Rostedt CC: Thomas Gleixner CC: Peter Zijlstra CC: Andrew Morton CC: David Miller CC: Ulrich Drepper CC: Rusty Russell CC: Gregory Haskins CC: Arnaldo Carvalho de Melo CC: "Luis Claudio R. Goncalves" CC: Clark Williams CC: Christoph Lameter CC: Andi Kleen CC: Harvey Harrison Signed-off-by: H. Peter Anvin diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h index 4f9a986..0b4d59a 100644 --- a/include/asm-x86/spinlock.h +++ b/include/asm-x86/spinlock.h @@ -97,7 +97,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) "jne 1f\n\t" "movw %w0,%w1\n\t" "incb %h1\n\t" - "lock ; cmpxchgw %w1,%2\n\t" + LOCK_PREFIX "cmpxchgw %w1,%2\n\t" "1:" "sete %b1\n\t" "movzbl %b1,%0\n\t" @@ -135,7 +135,7 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) int inc = 0x00010000; int tmp; - asm volatile("lock ; xaddl %0, %1\n" + asm volatile(LOCK_PREFIX "xaddl %0, %1\n" "movzwl %w0, %2\n\t" "shrl $16, %0\n\t" "1:\t" @@ -162,7 +162,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) "cmpl %0,%1\n\t" "jne 1f\n\t" "addl $0x00010000, %1\n\t" - "lock ; cmpxchgl %1,%2\n\t" + LOCK_PREFIX "cmpxchgl %1,%2\n\t" "1:" "sete %b1\n\t" "movzbl %b1,%0\n\t" -- cgit v0.10.2 From 6f6da97faf29f87b3980a6992fb8cab44b4c444d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 23:05:19 +0400 Subject: x86: apic - sync_Arb_IDs style fixup No changes on binary level Signed-off-by: Cyrill Gorcunov Acked-by: Maciej W. Rozycki Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d074889..60575c0 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -909,13 +909,15 @@ void __init sync_Arb_IDs(void) */ if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) return; + /* * Wait for idle. */ apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | + APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 41aff34..72e94ab 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -755,7 +755,8 @@ void __init sync_Arb_IDs(void) apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | + APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* -- cgit v0.10.2 From 638c0411922540deaf8797cacf73513b17618405 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 23:05:18 +0400 Subject: x86: apic - unify init_bsp_APIC - remove redundant read of APIC_LVR register in 64bit mode - APIC is always integrated for 64bit mode so gcc will eliminate lapic_is_integrated call Signed-off-by: Cyrill Gorcunov Acked-by: Maciej W. Rozycki Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 60575c0..16d9721 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -925,7 +925,7 @@ void __init sync_Arb_IDs(void) */ void __init init_bsp_APIC(void) { - unsigned long value; + unsigned int value; /* * Don't do the setup now if we have a SMP BIOS as the @@ -946,11 +946,13 @@ void __init init_bsp_APIC(void) value &= ~APIC_VECTOR_MASK; value |= APIC_SPIV_APIC_ENABLED; +#ifdef CONFIG_X86_32 /* This bit is reserved on P4/Xeon and should be cleared */ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15)) value &= ~APIC_SPIV_FOCUS_DISABLED; else +#endif value |= APIC_SPIV_FOCUS_DISABLED; value |= SPURIOUS_APIC_VECTOR; apic_write(APIC_SPIV, value); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 72e94ab..99d18b8 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -773,8 +773,6 @@ void __init init_bsp_APIC(void) if (smp_found_config || !cpu_has_apic) return; - value = apic_read(APIC_LVR); - /* * Do not trust the local APIC being empty at bootup. */ @@ -786,7 +784,15 @@ void __init init_bsp_APIC(void) value = apic_read(APIC_SPIV); value &= ~APIC_VECTOR_MASK; value |= APIC_SPIV_APIC_ENABLED; - value |= APIC_SPIV_FOCUS_DISABLED; + +#ifdef CONFIG_X86_32 + /* This bit is reserved on P4/Xeon and should be cleared */ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + (boot_cpu_data.x86 == 15)) + value &= ~APIC_SPIV_FOCUS_DISABLED; + else +#endif + value |= APIC_SPIV_FOCUS_DISABLED; value |= SPURIOUS_APIC_VECTOR; apic_write(APIC_SPIV, value); @@ -795,6 +801,8 @@ void __init init_bsp_APIC(void) */ apic_write(APIC_LVT0, APIC_DM_EXTINT); value = APIC_DM_NMI; + if (!lapic_is_integrated()) /* 82489DX */ + value |= APIC_LVT_LEVEL_TRIGGER; apic_write(APIC_LVT1, value); } -- cgit v0.10.2 From 6d341675f8e715464849e5d5563a72c1d39e800d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Va=C5=A1ut?= Date: Sat, 16 Aug 2008 15:35:57 +0100 Subject: [ARM] 5199/1: PalmLD: PCMCIA driver PCMCIA driver for Palm LifeDrive Signed-off-by: Marek Vasut Signed-off-by: Russell King diff --git a/drivers/pcmcia/Makefile b/drivers/pcmcia/Makefile index 269a9e9..c6d89fd 100644 --- a/drivers/pcmcia/Makefile +++ b/drivers/pcmcia/Makefile @@ -72,5 +72,6 @@ pxa2xx_cs-$(CONFIG_ARCH_LUBBOCK) += pxa2xx_lubbock.o sa1111_generic.o pxa2xx_cs-$(CONFIG_MACH_MAINSTONE) += pxa2xx_mainstone.o pxa2xx_cs-$(CONFIG_PXA_SHARPSL) += pxa2xx_sharpsl.o pxa2xx_cs-$(CONFIG_MACH_ARMCORE) += pxa2xx_cm_x270.o -pxa2xx_cs-$(CONFIG_MACH_PALMTX) += pxa2xx_palmtx.o +pxa2xx_cs-$(CONFIG_MACH_PALMTX) += pxa2xx_palmtx.o +pxa2xx_cs-$(CONFIG_MACH_PALMLD) += pxa2xx_palmld.o diff --git a/drivers/pcmcia/pxa2xx_palmld.c b/drivers/pcmcia/pxa2xx_palmld.c new file mode 100644 index 0000000..1736c67 --- /dev/null +++ b/drivers/pcmcia/pxa2xx_palmld.c @@ -0,0 +1,151 @@ +/* + * linux/drivers/pcmcia/pxa2xx_palmld.c + * + * Driver for Palm LifeDrive PCMCIA + * + * Copyright (C) 2006 Alex Osborne + * Copyright (C) 2007-2008 Marek Vasut + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include + +#include +#include +#include "soc_common.h" + +static int palmld_pcmcia_hw_init(struct soc_pcmcia_socket *skt) +{ + int ret; + + ret = gpio_request(GPIO_NR_PALMLD_PCMCIA_POWER, "PCMCIA PWR"); + if (ret) + goto err1; + ret = gpio_direction_output(GPIO_NR_PALMLD_PCMCIA_POWER, 0); + if (ret) + goto err2; + + ret = gpio_request(GPIO_NR_PALMLD_PCMCIA_RESET, "PCMCIA RST"); + if (ret) + goto err2; + ret = gpio_direction_output(GPIO_NR_PALMLD_PCMCIA_RESET, 1); + if (ret) + goto err3; + + ret = gpio_request(GPIO_NR_PALMLD_PCMCIA_READY, "PCMCIA RDY"); + if (ret) + goto err3; + ret = gpio_direction_input(GPIO_NR_PALMLD_PCMCIA_READY); + if (ret) + goto err4; + + skt->irq = IRQ_GPIO(GPIO_NR_PALMLD_PCMCIA_READY); + return 0; + +err4: + gpio_free(GPIO_NR_PALMLD_PCMCIA_READY); +err3: + gpio_free(GPIO_NR_PALMLD_PCMCIA_RESET); +err2: + gpio_free(GPIO_NR_PALMLD_PCMCIA_POWER); +err1: + return ret; +} + +static void palmld_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt) +{ + gpio_free(GPIO_NR_PALMLD_PCMCIA_READY); + gpio_free(GPIO_NR_PALMLD_PCMCIA_RESET); + gpio_free(GPIO_NR_PALMLD_PCMCIA_POWER); +} + +static void palmld_pcmcia_socket_state(struct soc_pcmcia_socket *skt, + struct pcmcia_state *state) +{ + state->detect = 1; /* always inserted */ + state->ready = !!gpio_get_value(GPIO_NR_PALMLD_PCMCIA_READY); + state->bvd1 = 1; + state->bvd2 = 1; + state->wrprot = 0; + state->vs_3v = 1; + state->vs_Xv = 0; +} + +static int palmld_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, + const socket_state_t *state) +{ + gpio_set_value(GPIO_NR_PALMLD_PCMCIA_POWER, 1); + gpio_set_value(GPIO_NR_PALMLD_PCMCIA_RESET, + !!(state->flags & SS_RESET)); + + return 0; +} + +static void palmld_pcmcia_socket_init(struct soc_pcmcia_socket *skt) +{ +} + +static void palmld_pcmcia_socket_suspend(struct soc_pcmcia_socket *skt) +{ +} + +static struct pcmcia_low_level palmld_pcmcia_ops = { + .owner = THIS_MODULE, + + .first = 0, + .nr = 2, + + .hw_init = palmld_pcmcia_hw_init, + .hw_shutdown = palmld_pcmcia_hw_shutdown, + + .socket_state = palmld_pcmcia_socket_state, + .configure_socket = palmld_pcmcia_configure_socket, + + .socket_init = palmld_pcmcia_socket_init, + .socket_suspend = palmld_pcmcia_socket_suspend, +}; + +static struct platform_device *palmld_pcmcia_device; + +static int __init palmld_pcmcia_init(void) +{ + int ret; + + if (!machine_is_palmld()) + return -ENODEV; + + palmld_pcmcia_device = platform_device_alloc("pxa2xx-pcmcia", -1); + if (!palmld_pcmcia_device) + return -ENOMEM; + + ret = platform_device_add_data(palmld_pcmcia_device, &palmld_pcmcia_ops, + sizeof(palmld_pcmcia_ops)); + + if (!ret) + ret = platform_device_add(palmld_pcmcia_device); + + if (ret) + platform_device_put(palmld_pcmcia_device); + + return ret; +} + +static void __exit palmld_pcmcia_exit(void) +{ + platform_device_unregister(palmld_pcmcia_device); +} + +module_init(palmld_pcmcia_init); +module_exit(palmld_pcmcia_exit); + +MODULE_AUTHOR("Alex Osborne ," + " Marek Vasut "); +MODULE_DESCRIPTION("PCMCIA support for Palm LifeDrive"); +MODULE_ALIAS("platform:pxa2xx-pcmcia"); +MODULE_LICENSE("GPL"); -- cgit v0.10.2 From 6764014bc8bb4849f6a4f336477e873ad5861ed2 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:50 +0400 Subject: x86: apic - unify clear_local_APIC - Remove redundant masking of APIC_LVTTHMR register in apic_32.c - Add masking of APIC_LVTTHMR register to apic_64.c. We use a bit complicated #ifdef here: CONFIG_X86_MCE_P4THERMAL is 32bit specific and X86_MCE_INTEL is 64bit specific so the appropriate config variable will be set by Kconfig. - the APIC_ESR register clearing in apic_64.c now uses not straightforward way but this is allowed tradeoff. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 16d9721..3131603 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -754,7 +754,7 @@ void clear_local_APIC(void) } /* lets not touch this if we didn't frob it */ -#ifdef CONFIG_X86_MCE_P4THERMAL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) if (maxlvt >= 5) { v = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); @@ -771,10 +771,6 @@ void clear_local_APIC(void) if (maxlvt >= 4) apic_write(APIC_LVTPC, APIC_LVT_MASKED); -#ifdef CONFIG_X86_MCE_P4THERMAL - if (maxlvt >= 5) - apic_write(APIC_LVTTHMR, APIC_LVT_MASKED); -#endif /* Integrated APIC (!82489DX) ? */ if (lapic_is_integrated()) { if (maxlvt > 3) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 99d18b8..d834b75 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -630,6 +630,13 @@ void clear_local_APIC(void) apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); } + /* lets not touch this if we didn't frob it */ +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) + if (maxlvt >= 5) { + v = apic_read(APIC_LVTTHMR); + apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); + } +#endif /* * Clean APIC state for other OSs: */ @@ -640,8 +647,14 @@ void clear_local_APIC(void) apic_write(APIC_LVTERR, APIC_LVT_MASKED); if (maxlvt >= 4) apic_write(APIC_LVTPC, APIC_LVT_MASKED); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); + + /* Integrated APIC (!82489DX) ? */ + if (lapic_is_integrated()) { + if (maxlvt > 3) + /* Clear ESR due to Pentium errata 3AP and 11AP */ + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + } } /** -- cgit v0.10.2 From 92206c909ad1d4f9c355b4842722d5a355d6b76b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:51 +0400 Subject: x86: apic - unify lapic_resume Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3131603..3d40213 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1606,16 +1606,21 @@ static int lapic_resume(struct sys_device *dev) local_irq_save(flags); - /* - * Make sure the APICBASE points to the right address - * - * FIXME! This will be wrong if we ever support suspend on - * SMP! We'll need to do this as part of the CPU restore! - */ - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); +#ifdef CONFIG_X86_64 + if (x2apic) + enable_x2apic(); + else +#endif + /* + * Make sure the APICBASE points to the right address + * + * FIXME! This will be wrong if we ever support suspend on + * SMP! We'll need to do this as part of the CPU restore! + */ + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); @@ -1625,7 +1630,7 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#ifdef CONFIG_X86_MCE_P4THERMAL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); #endif @@ -1639,7 +1644,9 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); + local_irq_restore(flags); + return 0; } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d834b75..e542a2d 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1412,13 +1412,22 @@ static int lapic_resume(struct sys_device *dev) maxlvt = lapic_get_maxlvt(); local_irq_save(flags); - if (!x2apic) { + +#ifdef CONFIG_X86_64 + if (x2apic) + enable_x2apic(); + else +#endif + /* + * Make sure the APICBASE points to the right address + * + * FIXME! This will be wrong if we ever support suspend on + * SMP! We'll need to do this as part of the CPU restore! + */ rdmsr(MSR_IA32_APICBASE, l, h); l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; wrmsr(MSR_IA32_APICBASE, l, h); - } else - enable_x2apic(); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); @@ -1428,7 +1437,7 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#ifdef CONFIG_X86_MCE_INTEL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); #endif @@ -1442,7 +1451,9 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); + local_irq_restore(flags); + return 0; } -- cgit v0.10.2 From 24968cfdd4c3cf61338495dd0f9bb8a6907d2087 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:52 +0400 Subject: x86: apic - unify lapic_suspend Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3d40213..6cb8aaa 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1582,7 +1582,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#ifdef CONFIG_X86_MCE_P4THERMAL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); #endif diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index e542a2d..13dea93 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1390,10 +1390,11 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#ifdef CONFIG_X86_MCE_INTEL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); #endif + local_irq_save(flags); disable_local_APIC(); local_irq_restore(flags); -- cgit v0.10.2 From 274cfe5912eebe9d4e1a4c451fe617289a181fcf Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:53 +0400 Subject: x86: apic - rearrange functions and comments Rearrange functions and comments to find differences easier. Also use apic_printk in setup_boot_APIC_clock for 64bit mode. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 6cb8aaa..9e8702e 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -251,6 +251,9 @@ int lapic_get_maxlvt(void) * this function twice on the boot CPU, once with a bogus timeout * value, second time for real. The other (noncalibrating) CPUs * call this function only once, with the real, calibrated value. + * + * We do reads before writes even if unnecessary, to get around the + * P5 APIC double write bug. */ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { @@ -280,6 +283,36 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) } /* + * Setup extended LVT, AMD specific (K8, family 10h) + * + * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and + * MCE interrupts are supported. Thus MCE offset must be set to 0. + */ + +#define APIC_EILVT_LVTOFF_MCE 0 +#define APIC_EILVT_LVTOFF_IBS 1 + +static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) +{ + unsigned long reg = (lvt_off << 4) + APIC_EILVT0; + unsigned int v = (mask << 16) | (msg_type << 8) | vector; + + apic_write(reg, v); +} + +u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) +{ + setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_MCE; +} + +u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) +{ + setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_IBS; +} + +/* * Program the next event, relative to now */ static int lapic_next_event(unsigned long delta, @@ -298,7 +331,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, unsigned long flags; unsigned int v; - /* Lapic used for broadcast ? */ + /* Lapic used as dummy for broadcast ? */ if (evt->features & CLOCK_EVT_FEAT_DUMMY) return; @@ -681,35 +714,6 @@ int setup_profiling_timer(unsigned int multiplier) } /* - * Setup extended LVT, AMD specific (K8, family 10h) - * - * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and - * MCE interrupts are supported. Thus MCE offset must be set to 0. - */ - -#define APIC_EILVT_LVTOFF_MCE 0 -#define APIC_EILVT_LVTOFF_IBS 1 - -static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) -{ - unsigned long reg = (lvt_off << 4) + APIC_EILVT0; - unsigned int v = (mask << 16) | (msg_type << 8) | vector; - apic_write(reg, v); -} - -u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_MCE; -} - -u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_IBS; -} - -/* * Local APIC start and shutdown */ @@ -1542,6 +1546,11 @@ void __cpuinit generic_processor_info(int apicid, int version) #ifdef CONFIG_PM static struct { + /* + * 'active' is true if the local APIC was enabled by us and + * not the BIOS; this signifies that we are also responsible + * for disabling it before entering apm/acpi suspend + */ int active; /* r/w apic fields */ unsigned int apic_id; diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 13dea93..46523c0 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -81,6 +81,9 @@ static void lapic_timer_setup(enum clock_event_mode mode, static void lapic_timer_broadcast(cpumask_t mask); static void apic_pm_activate(void); +/* + * The local apic timer can be used for any function which is CPU local. + */ static struct clock_event_device lapic_clockevent = { .name = "lapic", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT @@ -127,6 +130,11 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } +/* + * Paravirt kernels also might be using these below ops. So we still + * use generic apic_read()/apic_write(), which might be pointing to different + * ops in PARAVIRT case. + */ void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) @@ -175,7 +183,6 @@ static struct apic_ops xapic_ops = { }; struct apic_ops __read_mostly *apic_ops = &xapic_ops; - EXPORT_SYMBOL_GPL(apic_ops); static void x2apic_wait_icr_idle(void) @@ -244,6 +251,10 @@ int lapic_get_maxlvt(void) return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } +/* + * Local APIC timer + */ + /* Clock divisor is set to 1 */ #define APIC_DIVISOR 1 @@ -257,7 +268,6 @@ int lapic_get_maxlvt(void) * We do reads before writes even if unnecessary, to get around the * P5 APIC double write bug. */ - static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { unsigned int lvtt_value, tmp_value; @@ -474,10 +484,10 @@ static int __init calibrate_APIC_clock(void) void __init setup_boot_APIC_clock(void) { /* - * The local apic timer can be disabled via the kernel commandline. - * Register the lapic timer as a dummy clock event source on SMP - * systems, so the broadcast mechanism is used. On UP systems simply - * ignore it. + * The local apic timer can be disabled via the kernel + * commandline or from the CPU detection code. Register the lapic + * timer as a dummy clock event source on SMP systems, so the + * broadcast mechanism is used. On UP systems simply ignore it. */ if (disable_apic_timer) { printk(KERN_INFO "Disabling APIC timer\n"); @@ -489,7 +499,9 @@ void __init setup_boot_APIC_clock(void) return; } - printk(KERN_INFO "Using local APIC timer interrupts.\n"); + apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" + "calibrating APIC timer ...\n"); + if (calibrate_APIC_clock()) { /* No broadcast on UP ! */ if (num_possible_cpus() > 1) @@ -508,6 +520,7 @@ void __init setup_boot_APIC_clock(void) printk(KERN_WARNING "APIC timer registered as dummy," " due to nmi_watchdog=%d!\n", nmi_watchdog); + /* Setup the lapic or request the broadcast */ setup_APIC_timer(); } @@ -577,6 +590,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) irq_enter(); local_apic_timer_interrupt(); irq_exit(); + set_irq_regs(old_regs); } @@ -1248,6 +1262,13 @@ void __init connect_bsp_APIC(void) enable_apic_mode(); } +/** + * disconnect_bsp_APIC - detach the APIC from the interrupt system + * @virt_wire_setup: indicates, whether virtual wire mode is selected + * + * Virtual wire mode is necessary to deliver legacy interrupts even when the + * APIC is disabled. + */ void disconnect_bsp_APIC(int virt_wire_setup) { /* Go back to Virtual Wire compatibility mode */ @@ -1347,9 +1368,11 @@ int hard_smp_processor_id(void) #ifdef CONFIG_PM static struct { - /* 'active' is true if the local APIC was enabled by us and - not the BIOS; this signifies that we are also responsible - for disabling it before entering apm/acpi suspend */ + /* + * 'active' is true if the local APIC was enabled by us and + * not the BIOS; this signifies that we are also responsible + * for disabling it before entering apm/acpi suspend + */ int active; /* r/w apic fields */ unsigned int apic_id; @@ -1458,6 +1481,11 @@ static int lapic_resume(struct sys_device *dev) return 0; } +/* + * This device has no shutdown method - fully functioning local APICs + * are needed on every CPU up until machine_halt/restart/poweroff. + */ + static struct sysdev_class lapic_sysclass = { .name = "lapic", .resume = lapic_resume, -- cgit v0.10.2 From 9c803869f5f5e3d7ad94b2926474b2882e30073b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:54 +0400 Subject: x86: apic - unify lapic_is_integrated Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 9e8702e..41134d4 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -128,7 +128,11 @@ static inline int lapic_get_version(void) */ static inline int lapic_is_integrated(void) { +#ifdef CONFIG_X86_64 + return 1; +#else return APIC_INTEGRATED(lapic_get_version()); +#endif } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 46523c0..18c0b8c 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -111,11 +111,15 @@ static inline int lapic_get_version(void) } /* - * Check, if the APIC is integrated or a seperate chip + * Check, if the APIC is integrated or a separate chip */ static inline int lapic_is_integrated(void) { +#ifdef CONFIG_X86_64 return 1; +#else + return APIC_INTEGRATED(lapic_get_version()); +#endif } /* -- cgit v0.10.2 From cf9768d7514d59b3179a886c4a47908d72e6cf76 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:55 +0400 Subject: x86: apic - unify xapic_icr_read Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 18c0b8c..5e07381 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -174,7 +174,7 @@ u64 xapic_icr_read(void) icr2 = apic_read(APIC_ICR2); icr1 = apic_read(APIC_ICR); - return (icr1 | ((u64)icr2 << 32)); + return icr1 | ((u64)icr2 << 32); } static struct apic_ops xapic_ops = { -- cgit v0.10.2 From ccdffb9a88b2907b159538d7bfd6256621db4f84 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 26 Jul 2008 14:26:06 +0200 Subject: r8169: get ethtool settings through the generic mii helper It avoids to report unsupported link capabilities with the fast-ethernet only 8101/8102. Signed-off-by: Francois Romieu Tested-by: Martin Capitanio Fixed-by: Ivan Vecera Cc: Edward Hsu diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index a3e3895..dac2677 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -370,8 +370,9 @@ struct ring_info { }; enum features { - RTL_FEATURE_WOL = (1 << 0), - RTL_FEATURE_MSI = (1 << 1), + RTL_FEATURE_WOL = (1 << 0), + RTL_FEATURE_MSI = (1 << 1), + RTL_FEATURE_GMII = (1 << 2), }; struct rtl8169_private { @@ -406,13 +407,15 @@ struct rtl8169_private { struct vlan_group *vlgrp; #endif int (*set_speed)(struct net_device *, u8 autoneg, u16 speed, u8 duplex); - void (*get_settings)(struct net_device *, struct ethtool_cmd *); + int (*get_settings)(struct net_device *, struct ethtool_cmd *); void (*phy_reset_enable)(void __iomem *); void (*hw_start)(struct net_device *); unsigned int (*phy_reset_pending)(void __iomem *); unsigned int (*link_ok)(void __iomem *); struct delayed_work task; unsigned features; + + struct mii_if_info mii; }; MODULE_AUTHOR("Realtek and the Linux r8169 crew "); @@ -482,6 +485,23 @@ static int mdio_read(void __iomem *ioaddr, int reg_addr) return value; } +static void rtl_mdio_write(struct net_device *dev, int phy_id, int location, + int val) +{ + struct rtl8169_private *tp = netdev_priv(dev); + void __iomem *ioaddr = tp->mmio_addr; + + mdio_write(ioaddr, location, val); +} + +static int rtl_mdio_read(struct net_device *dev, int phy_id, int location) +{ + struct rtl8169_private *tp = netdev_priv(dev); + void __iomem *ioaddr = tp->mmio_addr; + + return mdio_read(ioaddr, location); +} + static void rtl8169_irq_mask_and_ack(void __iomem *ioaddr) { RTL_W16(IntrMask, 0x0000); @@ -850,7 +870,7 @@ static int rtl8169_rx_vlan_skb(struct rtl8169_private *tp, struct RxDesc *desc, #endif -static void rtl8169_gset_tbi(struct net_device *dev, struct ethtool_cmd *cmd) +static int rtl8169_gset_tbi(struct net_device *dev, struct ethtool_cmd *cmd) { struct rtl8169_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp->mmio_addr; @@ -867,65 +887,29 @@ static void rtl8169_gset_tbi(struct net_device *dev, struct ethtool_cmd *cmd) cmd->speed = SPEED_1000; cmd->duplex = DUPLEX_FULL; /* Always set */ + + return 0; } -static void rtl8169_gset_xmii(struct net_device *dev, struct ethtool_cmd *cmd) +static int rtl8169_gset_xmii(struct net_device *dev, struct ethtool_cmd *cmd) { struct rtl8169_private *tp = netdev_priv(dev); - void __iomem *ioaddr = tp->mmio_addr; - u8 status; - - cmd->supported = SUPPORTED_10baseT_Half | - SUPPORTED_10baseT_Full | - SUPPORTED_100baseT_Half | - SUPPORTED_100baseT_Full | - SUPPORTED_1000baseT_Full | - SUPPORTED_Autoneg | - SUPPORTED_TP; - - cmd->autoneg = 1; - cmd->advertising = ADVERTISED_TP | ADVERTISED_Autoneg; - - if (tp->phy_auto_nego_reg & ADVERTISE_10HALF) - cmd->advertising |= ADVERTISED_10baseT_Half; - if (tp->phy_auto_nego_reg & ADVERTISE_10FULL) - cmd->advertising |= ADVERTISED_10baseT_Full; - if (tp->phy_auto_nego_reg & ADVERTISE_100HALF) - cmd->advertising |= ADVERTISED_100baseT_Half; - if (tp->phy_auto_nego_reg & ADVERTISE_100FULL) - cmd->advertising |= ADVERTISED_100baseT_Full; - if (tp->phy_1000_ctrl_reg & ADVERTISE_1000FULL) - cmd->advertising |= ADVERTISED_1000baseT_Full; - - status = RTL_R8(PHYstatus); - - if (status & _1000bpsF) - cmd->speed = SPEED_1000; - else if (status & _100bps) - cmd->speed = SPEED_100; - else if (status & _10bps) - cmd->speed = SPEED_10; - - if (status & TxFlowCtrl) - cmd->advertising |= ADVERTISED_Asym_Pause; - if (status & RxFlowCtrl) - cmd->advertising |= ADVERTISED_Pause; - - cmd->duplex = ((status & _1000bpsF) || (status & FullDup)) ? - DUPLEX_FULL : DUPLEX_HALF; + + return mii_ethtool_gset(&tp->mii, cmd); } static int rtl8169_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct rtl8169_private *tp = netdev_priv(dev); unsigned long flags; + int rc; spin_lock_irqsave(&tp->lock, flags); - tp->get_settings(dev, cmd); + rc = tp->get_settings(dev, cmd); spin_unlock_irqrestore(&tp->lock, flags); - return 0; + return rc; } static void rtl8169_get_regs(struct net_device *dev, struct ethtool_regs *regs, @@ -1513,7 +1497,7 @@ static const struct rtl_cfg_info { unsigned int align; u16 intr_event; u16 napi_event; - unsigned msi; + unsigned features; } rtl_cfg_infos [] = { [RTL_CFG_0] = { .hw_start = rtl_hw_start_8169, @@ -1522,7 +1506,7 @@ static const struct rtl_cfg_info { .intr_event = SYSErr | LinkChg | RxOverflow | RxFIFOOver | TxErr | TxOK | RxOK | RxErr, .napi_event = RxFIFOOver | TxErr | TxOK | RxOK | RxOverflow, - .msi = 0 + .features = RTL_FEATURE_GMII }, [RTL_CFG_1] = { .hw_start = rtl_hw_start_8168, @@ -1531,7 +1515,7 @@ static const struct rtl_cfg_info { .intr_event = SYSErr | LinkChg | RxOverflow | TxErr | TxOK | RxOK | RxErr, .napi_event = TxErr | TxOK | RxOK | RxOverflow, - .msi = RTL_FEATURE_MSI + .features = RTL_FEATURE_GMII | RTL_FEATURE_MSI }, [RTL_CFG_2] = { .hw_start = rtl_hw_start_8101, @@ -1540,7 +1524,7 @@ static const struct rtl_cfg_info { .intr_event = SYSErr | LinkChg | RxOverflow | PCSTimeout | RxFIFOOver | TxErr | TxOK | RxOK | RxErr, .napi_event = RxFIFOOver | TxErr | TxOK | RxOK | RxOverflow, - .msi = RTL_FEATURE_MSI + .features = RTL_FEATURE_MSI } }; @@ -1552,7 +1536,7 @@ static unsigned rtl_try_msi(struct pci_dev *pdev, void __iomem *ioaddr, u8 cfg2; cfg2 = RTL_R8(Config2) & ~MSIEnable; - if (cfg->msi) { + if (cfg->features & RTL_FEATURE_MSI) { if (pci_enable_msi(pdev)) { dev_info(&pdev->dev, "no MSI. Back to INTx.\n"); } else { @@ -1578,6 +1562,7 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data; const unsigned int region = cfg->region; struct rtl8169_private *tp; + struct mii_if_info *mii; struct net_device *dev; void __iomem *ioaddr; unsigned int i; @@ -1602,6 +1587,14 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->pci_dev = pdev; tp->msg_enable = netif_msg_init(debug.msg_enable, R8169_MSG_DEFAULT); + mii = &tp->mii; + mii->dev = dev; + mii->mdio_read = rtl_mdio_read; + mii->mdio_write = rtl_mdio_write; + mii->phy_id_mask = 0x1f; + mii->reg_num_mask = 0x1f; + mii->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII); + /* enable device (incl. PCI PM wakeup and hotplug setup) */ rc = pci_enable_device(pdev); if (rc < 0) { -- cgit v0.10.2 From 458a9f617adfb2fc5f38e7673339115c4ba3290f Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 2 Aug 2008 15:50:02 +0200 Subject: r8169: Tx performance tweak helper Signed-off-by: Francois Romieu Cc: Edward Hsu diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index dac2677..26fa6e0 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -2054,12 +2054,20 @@ static void rtl_hw_start_8169(struct net_device *dev) RTL_W16(IntrMask, tp->intr_event); } +static void rtl_tx_performance_tweak(struct pci_dev *pdev, u8 force) +{ + u8 ctl; + + pci_read_config_byte(pdev, 0x69, &ctl); + ctl = (ctl & ~0x70) | force; + pci_write_config_byte(pdev, 0x69, ctl); +} + static void rtl_hw_start_8168(struct net_device *dev) { struct rtl8169_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp->mmio_addr; struct pci_dev *pdev = tp->pci_dev; - u8 ctl; RTL_W8(Cfg9346, Cfg9346_Unlock); @@ -2073,10 +2081,7 @@ static void rtl_hw_start_8168(struct net_device *dev) RTL_W16(CPlusCmd, tp->cp_cmd); - /* Tx performance tweak. */ - pci_read_config_byte(pdev, 0x69, &ctl); - ctl = (ctl & ~0x70) | 0x50; - pci_write_config_byte(pdev, 0x69, ctl); + rtl_tx_performance_tweak(pdev, 0x50); RTL_W16(IntrMitigate, 0x5151); -- cgit v0.10.2 From 9c14ceafa5ca7f57225a43fb0785c56ddc7f1823 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 5 Jul 2008 00:21:15 +0200 Subject: r8169: use pci_find_capability for the PCI-E features Signed-off-by: Francois Romieu Cc: Edward Hsu diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 26fa6e0..ae149a9 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -61,6 +61,7 @@ static const int multicast_filter_limit = 32; /* MAC address length */ #define MAC_ADDR_LEN 6 +#define MAX_READ_REQUEST_SHIFT 12 #define RX_FIFO_THRESH 7 /* 7 means NO threshold, Rx buffer level before first PCI xfer. */ #define RX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */ #define TX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */ @@ -412,6 +413,7 @@ struct rtl8169_private { void (*hw_start)(struct net_device *); unsigned int (*phy_reset_pending)(void __iomem *); unsigned int (*link_ok)(void __iomem *); + int pcie_cap; struct delayed_work task; unsigned features; @@ -1663,6 +1665,10 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_free_res_4; } + tp->pcie_cap = pci_find_capability(pdev, PCI_CAP_ID_EXP); + if (!tp->pcie_cap && netif_msg_probe(tp)) + dev_info(&pdev->dev, "no PCI Express capability\n"); + /* Unneeded ? Don't mess with Mrs. Murphy. */ rtl8169_irq_mask_and_ack(ioaddr); @@ -2054,13 +2060,19 @@ static void rtl_hw_start_8169(struct net_device *dev) RTL_W16(IntrMask, tp->intr_event); } -static void rtl_tx_performance_tweak(struct pci_dev *pdev, u8 force) +static void rtl_tx_performance_tweak(struct pci_dev *pdev, u16 force) { - u8 ctl; + struct net_device *dev = pci_get_drvdata(pdev); + struct rtl8169_private *tp = netdev_priv(dev); + int cap = tp->pcie_cap; + + if (cap) { + u16 ctl; - pci_read_config_byte(pdev, 0x69, &ctl); - ctl = (ctl & ~0x70) | force; - pci_write_config_byte(pdev, 0x69, ctl); + pci_read_config_word(pdev, cap + PCI_EXP_DEVCTL, &ctl); + ctl = (ctl & ~PCI_EXP_DEVCTL_READRQ) | force; + pci_write_config_word(pdev, cap + PCI_EXP_DEVCTL, ctl); + } } static void rtl_hw_start_8168(struct net_device *dev) @@ -2081,7 +2093,7 @@ static void rtl_hw_start_8168(struct net_device *dev) RTL_W16(CPlusCmd, tp->cp_cmd); - rtl_tx_performance_tweak(pdev, 0x50); + rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT); RTL_W16(IntrMitigate, 0x5151); @@ -2116,8 +2128,12 @@ static void rtl_hw_start_8101(struct net_device *dev) if ((tp->mac_version == RTL_GIGA_MAC_VER_13) || (tp->mac_version == RTL_GIGA_MAC_VER_16)) { - pci_write_config_word(pdev, 0x68, 0x00); - pci_write_config_word(pdev, 0x69, 0x08); + int cap = tp->pcie_cap; + + if (cap) { + pci_write_config_word(pdev, cap + PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_NOSNOOP_EN); + } } RTL_W8(Cfg9346, Cfg9346_Unlock); -- cgit v0.10.2 From f162a5d1b326d54b0be7e3100f69763d8a707721 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sun, 1 Jun 2008 22:37:49 +0200 Subject: r8169: add 8168/8101 registers description Signed-off-by: Francois Romieu Cc: Edward Hsu diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index ae149a9..600540e 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -197,9 +197,6 @@ enum rtl_registers { Config5 = 0x56, MultiIntr = 0x5c, PHYAR = 0x60, - TBICSR = 0x64, - TBI_ANAR = 0x68, - TBI_LPAR = 0x6a, PHYstatus = 0x6c, RxMaxSize = 0xda, CPlusCmd = 0xe0, @@ -213,6 +210,32 @@ enum rtl_registers { FuncForceEvent = 0xfc, }; +enum rtl8110_registers { + TBICSR = 0x64, + TBI_ANAR = 0x68, + TBI_LPAR = 0x6a, +}; + +enum rtl8168_8101_registers { + CSIDR = 0x64, + CSIAR = 0x68, +#define CSIAR_FLAG 0x80000000 +#define CSIAR_WRITE_CMD 0x80000000 +#define CSIAR_BYTE_ENABLE 0x0f +#define CSIAR_BYTE_ENABLE_SHIFT 12 +#define CSIAR_ADDR_MASK 0x0fff + + EPHYAR = 0x80, +#define EPHYAR_FLAG 0x80000000 +#define EPHYAR_WRITE_CMD 0x80000000 +#define EPHYAR_REG_MASK 0x1f +#define EPHYAR_REG_SHIFT 16 +#define EPHYAR_DATA_MASK 0xffff + DBG_REG = 0xd1, +#define FIX_NAK_1 (1 << 4) +#define FIX_NAK_2 (1 << 3) +}; + enum rtl_register_content { /* InterruptStatusBits */ SYSErr = 0x8000, @@ -266,7 +289,13 @@ enum rtl_register_content { TxDMAShift = 8, /* DMA burst value (0-7) is shift this many bits */ /* Config1 register p.24 */ + LEDS1 = (1 << 7), + LEDS0 = (1 << 6), MSIEnable = (1 << 5), /* Enable Message Signaled Interrupt */ + Speed_down = (1 << 4), + MEMMAP = (1 << 3), + IOMAP = (1 << 2), + VPD = (1 << 1), PMEnable = (1 << 0), /* Power Management Enable */ /* Config2 register p. 25 */ @@ -276,6 +305,7 @@ enum rtl_register_content { /* Config3 register p.25 */ MagicPacket = (1 << 5), /* Wake up when receives a Magic Packet */ LinkUp = (1 << 4), /* Wake up when the cable connection is re-established */ + Beacon_en = (1 << 0), /* 8168 only. Reserved in the 8168b */ /* Config5 register p.27 */ BWF = (1 << 6), /* Accept Broadcast wakeup frame */ @@ -293,7 +323,16 @@ enum rtl_register_content { TBINwComplete = 0x01000000, /* CPlusCmd p.31 */ - PktCntrDisable = (1 << 7), // 8168 + EnableBist = (1 << 15), // 8168 8101 + Mac_dbgo_oe = (1 << 14), // 8168 8101 + Normal_mode = (1 << 13), // unused + Force_half_dup = (1 << 12), // 8168 8101 + Force_rxflow_en = (1 << 11), // 8168 8101 + Force_txflow_en = (1 << 10), // 8168 8101 + Cxpl_dbg_sel = (1 << 9), // 8168 8101 + ASF = (1 << 8), // 8168 8101 + PktCntrDisable = (1 << 7), // 8168 8101 + Mac_dbgo_sel = 0x001c, // 8168 RxVlan = (1 << 6), RxChkSum = (1 << 5), PCIDAC = (1 << 4), -- cgit v0.10.2 From dacf815434a4d5f5b45687873df46927c64cfb19 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 2 Aug 2008 20:44:13 +0200 Subject: r8169: add hw start helpers for the 8168 and the 8101 This commit triggers three 'defined but not used' warnings but I prefer avoiding to tie these helpers to a specific change in the hw start sequences of the 8168 or of the 8101. Signed-off-by: Francois Romieu Cc: Edward Hsu diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 600540e..f4ad9ff 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -526,6 +526,11 @@ static int mdio_read(void __iomem *ioaddr, int reg_addr) return value; } +static void mdio_patch(void __iomem *ioaddr, int reg_addr, int value) +{ + mdio_write(ioaddr, reg_addr, mdio_read(ioaddr, reg_addr) | value); +} + static void rtl_mdio_write(struct net_device *dev, int phy_id, int location, int val) { @@ -543,6 +548,72 @@ static int rtl_mdio_read(struct net_device *dev, int phy_id, int location) return mdio_read(ioaddr, location); } +static void rtl_ephy_write(void __iomem *ioaddr, int reg_addr, int value) +{ + unsigned int i; + + RTL_W32(EPHYAR, EPHYAR_WRITE_CMD | (value & EPHYAR_DATA_MASK) | + (reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT); + + for (i = 0; i < 100; i++) { + if (!(RTL_R32(EPHYAR) & EPHYAR_FLAG)) + break; + udelay(10); + } +} + +static u16 rtl_ephy_read(void __iomem *ioaddr, int reg_addr) +{ + u16 value = 0xffff; + unsigned int i; + + RTL_W32(EPHYAR, (reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT); + + for (i = 0; i < 100; i++) { + if (RTL_R32(EPHYAR) & EPHYAR_FLAG) { + value = RTL_R32(EPHYAR) & EPHYAR_DATA_MASK; + break; + } + udelay(10); + } + + return value; +} + +static void rtl_csi_write(void __iomem *ioaddr, int addr, int value) +{ + unsigned int i; + + RTL_W32(CSIDR, value); + RTL_W32(CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) | + CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT); + + for (i = 0; i < 100; i++) { + if (!(RTL_R32(CSIAR) & CSIAR_FLAG)) + break; + udelay(10); + } +} + +static u32 rtl_csi_read(void __iomem *ioaddr, int addr) +{ + u32 value = ~0x00; + unsigned int i; + + RTL_W32(CSIAR, (addr & CSIAR_ADDR_MASK) | + CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT); + + for (i = 0; i < 100; i++) { + if (RTL_R32(CSIAR) & CSIAR_FLAG) { + value = RTL_R32(CSIDR); + break; + } + udelay(10); + } + + return value; +} + static void rtl8169_irq_mask_and_ack(void __iomem *ioaddr) { RTL_W16(IntrMask, 0x0000); @@ -2114,6 +2185,31 @@ static void rtl_tx_performance_tweak(struct pci_dev *pdev, u16 force) } } +static void rtl_csi_access_enable(void __iomem *ioaddr) +{ + u32 csi; + + csi = rtl_csi_read(ioaddr, 0x070c) & 0x00ffffff; + rtl_csi_write(ioaddr, 0x070c, csi | 0x27000000); +} + +struct ephy_info { + unsigned int offset; + u16 mask; + u16 bits; +}; + +static void rtl_ephy_init(void __iomem *ioaddr, struct ephy_info *e, int len) +{ + u16 w; + + while (len-- > 0) { + w = (rtl_ephy_read(ioaddr, e->offset) & ~e->mask) | e->bits; + rtl_ephy_write(ioaddr, e->offset, w); + e++; + } +} + static void rtl_hw_start_8168(struct net_device *dev) { struct rtl8169_private *tp = netdev_priv(dev); -- cgit v0.10.2 From 2857ffb7b8913ef713533ac5783abd70a20529e4 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Sat, 2 Aug 2008 21:08:49 +0200 Subject: r8169: additional 8101 and 8102 support Signed-off-by: Ivan Vecera Signed-off-by: Francois Romieu Cc: Edward Hsu diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index f4ad9ff..bbaf8a5 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -96,6 +96,10 @@ enum mac_version { RTL_GIGA_MAC_VER_04 = 0x04, // 8169SB RTL_GIGA_MAC_VER_05 = 0x05, // 8110SCd RTL_GIGA_MAC_VER_06 = 0x06, // 8110SCe + RTL_GIGA_MAC_VER_07 = 0x07, // 8102e + RTL_GIGA_MAC_VER_08 = 0x08, // 8102e + RTL_GIGA_MAC_VER_09 = 0x09, // 8102e + RTL_GIGA_MAC_VER_10 = 0x0a, // 8101e RTL_GIGA_MAC_VER_11 = 0x0b, // 8168Bb RTL_GIGA_MAC_VER_12 = 0x0c, // 8168Be RTL_GIGA_MAC_VER_13 = 0x0d, // 8101Eb @@ -122,6 +126,10 @@ static const struct { _R("RTL8169sb/8110sb", RTL_GIGA_MAC_VER_04, 0xff7e1880), // 8169SB _R("RTL8169sc/8110sc", RTL_GIGA_MAC_VER_05, 0xff7e1880), // 8110SCd _R("RTL8169sc/8110sc", RTL_GIGA_MAC_VER_06, 0xff7e1880), // 8110SCe + _R("RTL8102e", RTL_GIGA_MAC_VER_07, 0xff7e1880), // PCI-E + _R("RTL8102e", RTL_GIGA_MAC_VER_08, 0xff7e1880), // PCI-E + _R("RTL8102e", RTL_GIGA_MAC_VER_09, 0xff7e1880), // PCI-E + _R("RTL8101e", RTL_GIGA_MAC_VER_10, 0xff7e1880), // PCI-E _R("RTL8168b/8111b", RTL_GIGA_MAC_VER_11, 0xff7e1880), // PCI-E _R("RTL8168b/8111b", RTL_GIGA_MAC_VER_12, 0xff7e1880), // PCI-E _R("RTL8101e", RTL_GIGA_MAC_VER_13, 0xff7e1880), // PCI-E 8139 @@ -837,8 +845,12 @@ static int rtl8169_set_speed_xmii(struct net_device *dev, } } - /* The 8100e/8101e do Fast Ethernet only. */ - if ((tp->mac_version == RTL_GIGA_MAC_VER_13) || + /* The 8100e/8101e/8102e do Fast Ethernet only. */ + if ((tp->mac_version == RTL_GIGA_MAC_VER_07) || + (tp->mac_version == RTL_GIGA_MAC_VER_08) || + (tp->mac_version == RTL_GIGA_MAC_VER_09) || + (tp->mac_version == RTL_GIGA_MAC_VER_10) || + (tp->mac_version == RTL_GIGA_MAC_VER_13) || (tp->mac_version == RTL_GIGA_MAC_VER_14) || (tp->mac_version == RTL_GIGA_MAC_VER_15) || (tp->mac_version == RTL_GIGA_MAC_VER_16)) { @@ -1212,8 +1224,17 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp, { 0x7c800000, 0x30000000, RTL_GIGA_MAC_VER_11 }, /* 8101 family. */ + { 0x7cf00000, 0x34a00000, RTL_GIGA_MAC_VER_09 }, + { 0x7cf00000, 0x24a00000, RTL_GIGA_MAC_VER_09 }, + { 0x7cf00000, 0x34900000, RTL_GIGA_MAC_VER_08 }, + { 0x7cf00000, 0x24900000, RTL_GIGA_MAC_VER_08 }, + { 0x7cf00000, 0x34800000, RTL_GIGA_MAC_VER_07 }, + { 0x7cf00000, 0x24800000, RTL_GIGA_MAC_VER_07 }, { 0x7cf00000, 0x34000000, RTL_GIGA_MAC_VER_13 }, + { 0x7cf00000, 0x34300000, RTL_GIGA_MAC_VER_10 }, { 0x7cf00000, 0x34200000, RTL_GIGA_MAC_VER_16 }, + { 0x7c800000, 0x34800000, RTL_GIGA_MAC_VER_09 }, + { 0x7c800000, 0x24800000, RTL_GIGA_MAC_VER_09 }, { 0x7c800000, 0x34000000, RTL_GIGA_MAC_VER_16 }, /* FIXME: where did these entries come from ? -- FR */ { 0xfc800000, 0x38800000, RTL_GIGA_MAC_VER_15 }, @@ -1375,6 +1396,22 @@ static void rtl8168cx_hw_phy_config(void __iomem *ioaddr) rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init)); } +static void rtl8102e_hw_phy_config(void __iomem *ioaddr) +{ + struct phy_reg phy_reg_init[] = { + { 0x1f, 0x0003 }, + { 0x08, 0x441d }, + { 0x01, 0x9100 }, + { 0x1f, 0x0000 } + }; + + mdio_write(ioaddr, 0x1f, 0x0000); + mdio_patch(ioaddr, 0x11, 1 << 12); + mdio_patch(ioaddr, 0x19, 1 << 13); + + rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init)); +} + static void rtl_hw_phy_config(struct net_device *dev) { struct rtl8169_private *tp = netdev_priv(dev); @@ -1392,6 +1429,11 @@ static void rtl_hw_phy_config(struct net_device *dev) case RTL_GIGA_MAC_VER_04: rtl8169sb_hw_phy_config(ioaddr); break; + case RTL_GIGA_MAC_VER_07: + case RTL_GIGA_MAC_VER_08: + case RTL_GIGA_MAC_VER_09: + rtl8102e_hw_phy_config(ioaddr); + break; case RTL_GIGA_MAC_VER_18: rtl8168cp_hw_phy_config(ioaddr); break; @@ -2255,6 +2297,70 @@ static void rtl_hw_start_8168(struct net_device *dev) RTL_W16(IntrMask, tp->intr_event); } +#define R810X_CPCMD_QUIRK_MASK (\ + EnableBist | \ + Mac_dbgo_oe | \ + Force_half_dup | \ + Force_half_dup | \ + Force_txflow_en | \ + Cxpl_dbg_sel | \ + ASF | \ + PktCntrDisable | \ + PCIDAC | \ + PCIMulRW) + +static void rtl_hw_start_8102e_1(void __iomem *ioaddr, struct pci_dev *pdev) +{ + static struct ephy_info e_info_8102e_1[] = { + { 0x01, 0, 0x6e65 }, + { 0x02, 0, 0x091f }, + { 0x03, 0, 0xc2f9 }, + { 0x06, 0, 0xafb5 }, + { 0x07, 0, 0x0e00 }, + { 0x19, 0, 0xec80 }, + { 0x01, 0, 0x2e65 }, + { 0x01, 0, 0x6e65 } + }; + u8 cfg1; + + rtl_csi_access_enable(ioaddr); + + RTL_W8(DBG_REG, FIX_NAK_1); + + rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT); + + RTL_W8(Config1, + LEDS1 | LEDS0 | Speed_down | MEMMAP | IOMAP | VPD | PMEnable); + RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en); + + cfg1 = RTL_R8(Config1); + if ((cfg1 & LEDS0) && (cfg1 & LEDS1)) + RTL_W8(Config1, cfg1 & ~LEDS0); + + RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R810X_CPCMD_QUIRK_MASK); + + rtl_ephy_init(ioaddr, e_info_8102e_1, ARRAY_SIZE(e_info_8102e_1)); +} + +static void rtl_hw_start_8102e_2(void __iomem *ioaddr, struct pci_dev *pdev) +{ + rtl_csi_access_enable(ioaddr); + + rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT); + + RTL_W8(Config1, MEMMAP | IOMAP | VPD | PMEnable); + RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en); + + RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R810X_CPCMD_QUIRK_MASK); +} + +static void rtl_hw_start_8102e_3(void __iomem *ioaddr, struct pci_dev *pdev) +{ + rtl_hw_start_8102e_2(ioaddr, pdev); + + rtl_ephy_write(ioaddr, 0x03, 0xc2f9); +} + static void rtl_hw_start_8101(struct net_device *dev) { struct rtl8169_private *tp = netdev_priv(dev); @@ -2271,6 +2377,20 @@ static void rtl_hw_start_8101(struct net_device *dev) } } + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_07: + rtl_hw_start_8102e_1(ioaddr, pdev); + break; + + case RTL_GIGA_MAC_VER_08: + rtl_hw_start_8102e_3(ioaddr, pdev); + break; + + case RTL_GIGA_MAC_VER_09: + rtl_hw_start_8102e_2(ioaddr, pdev); + break; + } + RTL_W8(Cfg9346, Cfg9346_Unlock); RTL_W8(EarlyTxThres, EarlyTxThld); -- cgit v0.10.2 From cd95851785bcfe95fdf73689e8ecb5a1c5959231 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 17 Aug 2008 07:37:15 -0700 Subject: rcu: fix classic RCU locking cleanup lockdep problem On Fri, Aug 15, 2008 at 04:24:30PM +0200, Ingo Molnar wrote: > > Paul, > > one of your two recent RCU patches caused this lockdep splat in -tip > testing: > > -------------------> > Brought up 2 CPUs > Total of 2 processors activated (6850.87 BogoMIPS). > PM: Adding info for No Bus:platform > khelper used greatest stack depth: 3124 bytes left > > ================================= > [ INFO: inconsistent lock state ] > 2.6.27-rc3-tip #1 > --------------------------------- > inconsistent {softirq-on-W} -> {in-softirq-W} usage. > ksoftirqd/0/4 [HC0[0]:SC1[1]:HE1:SE0] takes: > (&rcu_ctrlblk.lock){-+..}, at: [] __rcu_process_callbacks+0x1ac/0x1f0 > {softirq-on-W} state was registered at: > [] __lock_acquire+0x3f4/0x5b0 > [] lock_acquire+0x89/0xc0 > [] _spin_lock+0x3b/0x70 > [] rcu_init_percpu_data+0x29/0x80 > [] rcu_cpu_notify+0xaf/0xd0 > [] notifier_call_chain+0x2d/0x60 > [] __raw_notifier_call_chain+0x1e/0x30 > [] _cpu_up+0x79/0x110 > [] cpu_up+0x4d/0x70 > [] kernel_init+0xb1/0x200 > [] kernel_thread_helper+0x7/0x10 > [] 0xffffffff > irq event stamp: 14 > hardirqs last enabled at (14): [] trace_hardirqs_on+0xb/0x10 > hardirqs last disabled at (13): [] trace_hardirqs_off+0xb/0x10 > softirqs last enabled at (0): [] copy_process+0x276/0x1190 > softirqs last disabled at (11): [] call_on_stack+0x1a/0x30 > > other info that might help us debug this: > no locks held by ksoftirqd/0/4. > > stack backtrace: > Pid: 4, comm: ksoftirqd/0 Not tainted 2.6.27-rc3-tip #1 > [] print_usage_bug+0x16c/0x1b0 > [] mark_lock+0xa75/0xb10 > [] ? sched_clock+0x15/0x30 > [] __lock_acquire+0x3ad/0x5b0 > [] lock_acquire+0x89/0xc0 > [] ? __rcu_process_callbacks+0x1ac/0x1f0 > [] _spin_lock+0x3b/0x70 > [] ? __rcu_process_callbacks+0x1ac/0x1f0 > [] __rcu_process_callbacks+0x1ac/0x1f0 > [] rcu_process_callbacks+0x26/0x50 > [] __do_softirq+0x95/0x120 > [] ? __do_softirq+0x0/0x120 > [] call_on_stack+0x1a/0x30 > [] ? ksoftirqd+0x96/0x110 > [] ? ksoftirqd+0x0/0x110 > [] ? kthread+0x47/0x80 > [] ? kthread+0x0/0x80 > [] ? kernel_thread_helper+0x7/0x10 > ======================= > calling init_cpufreq_transition_notifier_list+0x0/0x20 > initcall init_cpufreq_transition_notifier_list+0x0/0x20 returned 0 after 0 msecs > calling net_ns_init+0x0/0x190 > net_namespace: 676 bytes > initcall net_ns_init+0x0/0x190 returned 0 after 0 msecs > calling cpufreq_tsc+0x0/0x20 > initcall cpufreq_tsc+0x0/0x20 returned 0 after 0 msecs > calling reboot_init+0x0/0x20 > initcall reboot_init+0x0/0x20 returned 0 after 0 msecs > calling print_banner+0x0/0x10 > Booting paravirtualized kernel on bare hardware > > <----------------------- > > my guess is on: > > commit 1f7b94cd3d564901f9e04a8bc5832ae7bfd690a0 > Author: Paul E. McKenney > Date: Tue Aug 5 09:21:44 2008 -0700 > > rcu: classic RCU locking and memory-barrier cleanups > > Ingo Fixes a problem detected by lockdep in which rcu->lock was acquired both in irq context and in process context, but without disabling from process context. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index 5de1266..fb1f1cc4 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -700,7 +700,9 @@ void rcu_check_callbacks(int cpu, int user) static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, struct rcu_data *rdp) { - spin_lock(&rcp->lock); + long flags; + + spin_lock_irqsave(&rcp->lock, flags); memset(rdp, 0, sizeof(*rdp)); rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2] = &rdp->nxtlist; rdp->donetail = &rdp->donelist; @@ -708,7 +710,7 @@ static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, rdp->qs_pending = 0; rdp->cpu = cpu; rdp->blimit = blimit; - spin_unlock(&rcp->lock); + spin_unlock_irqrestore(&rcp->lock, flags); } static void __cpuinit rcu_online_cpu(int cpu) -- cgit v0.10.2 From 7b22ff5344fda666e0938e5261ea7b9a3dfce497 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 18 Aug 2008 00:36:18 +0900 Subject: x86 gart: allocate size-aligned address for alloc_coherent, v2 This patch changes GART IOMMU to return a size aligned address wrt dma_alloc_coherent, as DMA-mapping.txt defines: The cpu return address and the DMA bus master address are both guaranteed to be aligned to the smallest PAGE_SIZE order which is greater than or equal to the requested size. This invariant exists (for example) to guarantee that if you allocate a chunk which is smaller than or equal to 64 kilobytes, the extent of the buffer you receive will not cross a 64K boundary. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index cdab678..4d8efb0 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -82,7 +82,8 @@ AGPEXTERN __u32 *agp_gatt_table; static unsigned long next_bit; /* protected by iommu_bitmap_lock */ static int need_flush; /* global flush state. set for each gart wrap */ -static unsigned long alloc_iommu(struct device *dev, int size) +static unsigned long alloc_iommu(struct device *dev, int size, + unsigned long align_mask) { unsigned long offset, flags; unsigned long boundary_size; @@ -95,11 +96,12 @@ static unsigned long alloc_iommu(struct device *dev, int size) spin_lock_irqsave(&iommu_bitmap_lock, flags); offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, - size, base_index, boundary_size, 0); + size, base_index, boundary_size, align_mask); if (offset == -1) { need_flush = 1; offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, - size, base_index, boundary_size, 0); + size, base_index, boundary_size, + align_mask); } if (offset != -1) { next_bit = offset+size; @@ -236,10 +238,10 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) * Caller needs to check if the iommu is needed and flush. */ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, - size_t size, int dir) + size_t size, int dir, unsigned long align_mask) { unsigned long npages = iommu_num_pages(phys_mem, size); - unsigned long iommu_page = alloc_iommu(dev, npages); + unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); int i; if (iommu_page == -1) { @@ -262,7 +264,11 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, static dma_addr_t gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir) { - dma_addr_t map = dma_map_area(dev, paddr, size, dir); + dma_addr_t map; + unsigned long align_mask; + + align_mask = (1UL << get_order(size)) - 1; + map = dma_map_area(dev, paddr, size, dir, align_mask); flush_gart(); @@ -281,7 +287,8 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) if (!need_iommu(dev, paddr, size)) return paddr; - bus = gart_map_simple(dev, paddr, size, dir); + bus = dma_map_area(dev, paddr, size, dir, 0); + flush_gart(); return bus; } @@ -340,7 +347,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, unsigned long addr = sg_phys(s); if (nonforced_iommu(dev, addr, s->length)) { - addr = dma_map_area(dev, addr, s->length, dir); + addr = dma_map_area(dev, addr, s->length, dir, 0); if (addr == bad_dma_address) { if (i > 0) gart_unmap_sg(dev, sg, i, dir); @@ -362,7 +369,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, struct scatterlist *sout, unsigned long pages) { - unsigned long iommu_start = alloc_iommu(dev, pages); + unsigned long iommu_start = alloc_iommu(dev, pages, 0); unsigned long iommu_page = iommu_start; struct scatterlist *s; int i; -- cgit v0.10.2 From d5e629a6f88137fb77c4cc857be5ea7c3f27110d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 17 Aug 2008 21:12:27 -0700 Subject: x86: apic - unify lapic_resume - fix Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 5e07381..ad532dc 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1446,6 +1446,7 @@ static int lapic_resume(struct sys_device *dev) enable_x2apic(); else #endif + { /* * Make sure the APICBASE points to the right address * @@ -1456,6 +1457,7 @@ static int lapic_resume(struct sys_device *dev) l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; wrmsr(MSR_IA32_APICBASE, l, h); + } apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); -- cgit v0.10.2 From ded00a56e99555c3f4000ef3eebfd5fe0d574565 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 17 Aug 2008 12:50:36 -0700 Subject: rcu: remove redundant ACCESS_ONCE definition from rcupreempt.c Remove the redundant definition of ACCESS_ONCE() from rcupreempt.c in favor of the one in compiler.h. Also merge the comment header from rcupreempt.c's definition into that in compiler.h. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c8bd2da..8322141 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -190,7 +190,9 @@ extern void __chk_io_ptr(const volatile void __iomem *); * ACCESS_ONCE() in different C statements. * * This macro does absolutely -nothing- to prevent the CPU from reordering, - * merging, or refetching absolutely anything at any time. + * merging, or refetching absolutely anything at any time. Its main intended + * use is to mediate communication between process-level code and irq/NMI + * handlers, all running on the same CPU. */ #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 2782793..ca4bbbe 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -59,14 +59,6 @@ #include /* - * Macro that prevents the compiler from reordering accesses, but does - * absolutely -nothing- to prevent CPUs from reordering. This is used - * only to mediate communication between mainline code and hardware - * interrupt and NMI handlers. - */ -#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) - -/* * PREEMPT_RCU data structures. */ -- cgit v0.10.2 From 8bfcb3960fde049b863266dab8c3617bb5a541aa Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Mon, 18 Aug 2008 12:33:20 +0200 Subject: x86: make movsl_mask definition non-CPU specific movsl_mask is currently defined in arch/x86/kernel/cpu/intel.c, which contains code specific to Intel CPUs. However, movsl_mask is used in the non-CPU specific code in arch/x86/lib/usercopy_32.c, which breaks the compilation when support for Intel CPUs is compiled out. This patch solves this problem by moving movsl_mask's definition close to its users in arch/x86/lib/usercopy_32.c. Signed-off-by: Thomas Petazzoni Cc: michael@free-electrons.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index b75f256..5c8959b 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -23,13 +23,6 @@ #include #endif -#ifdef CONFIG_X86_INTEL_USERCOPY -/* - * Alignment at which movsl is preferred for bulk memory copies. - */ -struct movsl_mask movsl_mask __read_mostly; -#endif - static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 24e6094..9e68075 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -14,6 +14,13 @@ #include #include +#ifdef CONFIG_X86_INTEL_USERCOPY +/* + * Alignment at which movsl is preferred for bulk memory copies. + */ +struct movsl_mask movsl_mask __read_mostly; +#endif + static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n) { #ifdef CONFIG_X86_INTEL_USERCOPY -- cgit v0.10.2 From 774400a3ba23b63f4de39e67ce6c4e48935809dc Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Mon, 18 Aug 2008 12:33:21 +0200 Subject: x86: move cmpxchg fallbacks to a generic place arch/x86/kernel/cpu/intel.c defines a few fallback functions (cmpxchg_*()) that are used when the CPU doesn't support cmpxchg and/or cmpxchg64 natively. However, while defined in an Intel-specific file, these functions are also used for CPUs from other vendors when they don't support cmpxchg and/or cmpxchg64. This breaks the compilation when support for Intel CPUs is disabled. This patch moves these functions to a new arch/x86/kernel/cpu/cmpxchg.c file, unconditionally compiled when X86_32 is enabled. Signed-off-by: Thomas Petazzoni Cc: michael@free-electrons.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index ee76eaa..25b4c06 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -5,7 +5,7 @@ obj-y := intel_cacheinfo.o addon_cpuid_features.o obj-y += proc.o feature_names.o -obj-$(CONFIG_X86_32) += common.o bugs.o +obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += common_64.o bugs_64.o obj-$(CONFIG_X86_32) += amd.o obj-$(CONFIG_X86_64) += amd_64.o diff --git a/arch/x86/kernel/cpu/cmpxchg.c b/arch/x86/kernel/cpu/cmpxchg.c new file mode 100644 index 0000000..2056ccf --- /dev/null +++ b/arch/x86/kernel/cpu/cmpxchg.c @@ -0,0 +1,72 @@ +/* + * cmpxchg*() fallbacks for CPU not supporting these instructions + */ + +#include +#include +#include + +#ifndef CONFIG_X86_CMPXCHG +unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) +{ + u8 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u8 *)ptr; + if (prev == old) + *(u8 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u8); + +unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) +{ + u16 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u16 *)ptr; + if (prev == old) + *(u16 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u16); + +unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) +{ + u32 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u32 *)ptr; + if (prev == old) + *(u32 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u32); +#endif + +#ifndef CONFIG_X86_CMPXCHG64 +unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) +{ + u64 prev; + unsigned long flags; + + /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u64 *)ptr; + if (prev == old) + *(u64 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_486_u64); +#endif + diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 5c8959b..77618c7 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -307,69 +307,5 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); -#ifndef CONFIG_X86_CMPXCHG -unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) -{ - u8 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u8 *)ptr; - if (prev == old) - *(u8 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u8); - -unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) -{ - u16 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u16 *)ptr; - if (prev == old) - *(u16 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u16); - -unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) -{ - u32 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u32 *)ptr; - if (prev == old) - *(u32 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u32); -#endif - -#ifndef CONFIG_X86_CMPXCHG64 -unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) -{ - u64 prev; - unsigned long flags; - - /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u64 *)ptr; - if (prev == old) - *(u64 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_486_u64); -#endif - /* arch_initcall(intel_cpu_init); */ -- cgit v0.10.2 From 8d02c2110b3fb8e2700b31596a582a2989fd72ba Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 5 Aug 2008 11:45:19 +0200 Subject: x86: configuration options to compile out x86 CPU support code This patch adds some configuration options that allow to compile out CPU vendor-specific code in x86 kernels (in arch/x86/kernel/cpu). The new configuration options are only visible when CONFIG_EMBEDDED is selected, as they are mostly interesting for space savings reasons. An example of size saving, on x86 with only Intel CPU support: text data bss dec hex filename 1125479 118760 212992 1457231 163c4f vmlinux.old 1121355 116536 212992 1450883 162383 vmlinux -4124 -2224 0 -6348 -18CC +/- However, I'm not exactly sure that the Kconfig wording is correct with regard to !64BIT / 64BIT. [ mingo@elte.hu: convert macro to inline ] Signed-off-by: Thomas Petazzoni Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2c518fb..6156ac2 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -415,3 +415,73 @@ config X86_MINIMUM_CPU_FAMILY config X86_DEBUGCTLMSR def_bool y depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) + +menuconfig PROCESSOR_SELECT + default y + bool "Supported processor vendors" if EMBEDDED + help + This lets you choose what x86 vendor support code your kernel + will include. + +config CPU_SUP_INTEL_32 + default y + bool "Support Intel processors" if PROCESSOR_SELECT + depends on !64BIT + help + This enables extended support for Intel processors + +config CPU_SUP_INTEL_64 + default y + bool "Support Intel processors" if PROCESSOR_SELECT + depends on 64BIT + help + This enables extended support for Intel processors + +config CPU_SUP_CYRIX_32 + default y + bool "Support Cyrix processors" if PROCESSOR_SELECT + depends on !64BIT + help + This enables extended support for Cyrix processors + +config CPU_SUP_AMD_32 + default y + bool "Support AMD processors" if PROCESSOR_SELECT + depends on !64BIT + help + This enables extended support for AMD processors + +config CPU_SUP_AMD_64 + default y + bool "Support AMD processors" if PROCESSOR_SELECT + depends on 64BIT + help + This enables extended support for AMD processors + +config CPU_SUP_CENTAUR_32 + default y + bool "Support Centaur processors" if PROCESSOR_SELECT + depends on !64BIT + help + This enables extended support for Centaur processors + +config CPU_SUP_CENTAUR_64 + default y + bool "Support Centaur processors" if PROCESSOR_SELECT + depends on 64BIT + help + This enables extended support for Centaur processors + +config CPU_SUP_TRANSMETA_32 + default y + bool "Support Transmeta processors" if PROCESSOR_SELECT + depends on !64BIT + help + This enables extended support for Transmeta processors + +config CPU_SUP_UMC_32 + default y + bool "Support UMC processors" if PROCESSOR_SELECT + depends on !64BIT + help + This enables extended support for UMC processors diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 25b4c06..a0fc6c1 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -7,15 +7,16 @@ obj-y += proc.o feature_names.o obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += common_64.o bugs_64.o -obj-$(CONFIG_X86_32) += amd.o -obj-$(CONFIG_X86_64) += amd_64.o -obj-$(CONFIG_X86_32) += cyrix.o -obj-$(CONFIG_X86_32) += centaur.o -obj-$(CONFIG_X86_64) += centaur_64.o -obj-$(CONFIG_X86_32) += transmeta.o -obj-$(CONFIG_X86_32) += intel.o -obj-$(CONFIG_X86_64) += intel_64.o -obj-$(CONFIG_X86_32) += umc.o + +obj-$(CONFIG_CPU_SUP_AMD_32) += amd.o +obj-$(CONFIG_CPU_SUP_AMD_64) += amd_64.o +obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o +obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o +obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o +obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o +obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o +obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o +obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ diff --git a/include/asm-x86/bugs.h b/include/asm-x86/bugs.h index 021cbdd..00e4a0c 100644 --- a/include/asm-x86/bugs.h +++ b/include/asm-x86/bugs.h @@ -2,6 +2,11 @@ #define _ASM_X86_BUGS_H extern void check_bugs(void); + +#ifdef CONFIG_CPU_SUP_INTEL_32 int ppro_with_ram_bug(void); +#else +static inline int ppro_with_ram_bug(void) { return 0; } +#endif #endif /* _ASM_X86_BUGS_H */ -- cgit v0.10.2 From b6c8051311e1a14d229df05ea39d0a1b2ce90cdd Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:49 +0400 Subject: x86: apic - rearrange maxcpu definition Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 41134d4..8d1febf 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -114,6 +114,8 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events); static int enabled_via_apicbase; static unsigned long apic_phys; +unsigned int __cpuinitdata maxcpus = NR_CPUS; + /* * Get the LAPIC version @@ -1459,8 +1461,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) } } -unsigned int __cpuinitdata maxcpus = NR_CPUS; - void __cpuinit generic_processor_info(int apicid, int version) { int cpu; diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index ad532dc..46acb9b 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -98,10 +98,10 @@ static struct clock_event_device lapic_clockevent = { static DEFINE_PER_CPU(struct clock_event_device, lapic_events); static unsigned long apic_phys; +unsigned int __cpuinitdata maxcpus = NR_CPUS; unsigned long mp_lapic_addr; -unsigned int __cpuinitdata maxcpus = NR_CPUS; /* * Get the LAPIC version */ -- cgit v0.10.2 From f1ee37891dab6014f6b0ec77b18bc07e2369a55f Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:50 +0400 Subject: x86: apic - unify setup_boot_APIC_clock Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 8d1febf..80db3e0 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -612,6 +612,7 @@ void __init setup_boot_APIC_clock(void) * broadcast mechanism is used. On UP systems simply ignore it. */ if (disable_apic_timer) { + printk(KERN_INFO "Disabling APIC timer\n"); /* No broadcast on UP ! */ if (num_possible_cpus() > 1) { lapic_clockevent.mult = 1; -- cgit v0.10.2 From 990b183e58cb513a62492b6218987750e106cbfb Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:51 +0400 Subject: x86: apic - unify disable_local_APIC Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 80db3e0..13c4b79 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -796,7 +796,7 @@ void clear_local_APIC(void) */ void disable_local_APIC(void) { - unsigned long value; + unsigned int value; clear_local_APIC(); @@ -808,6 +808,7 @@ void disable_local_APIC(void) value &= ~APIC_SPIV_APIC_ENABLED; apic_write(APIC_SPIV, value); +#ifdef CONFIG_X86_32 /* * When LAPIC was disabled by the BIOS and enabled by the kernel, * restore the disabled state. @@ -819,6 +820,7 @@ void disable_local_APIC(void) l &= ~MSR_IA32_APICBASE_ENABLE; wrmsr(MSR_IA32_APICBASE, l, h); } +#endif } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 46acb9b..4fb903b 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -691,6 +691,20 @@ void disable_local_APIC(void) value = apic_read(APIC_SPIV); value &= ~APIC_SPIV_APIC_ENABLED; apic_write(APIC_SPIV, value); + +#ifdef CONFIG_X86_32 + /* + * When LAPIC was disabled by the BIOS and enabled by the kernel, + * restore the disabled state. + */ + if (enabled_via_apicbase) { + unsigned int l, h; + + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_ENABLE; + wrmsr(MSR_IA32_APICBASE, l, h); + } +#endif } void lapic_shutdown(void) -- cgit v0.10.2 From fe4024dcb0c01e5399394d2807406a2c13fb1eb7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:52 +0400 Subject: x86: apic - unify lapic_shutdown Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 13c4b79..d4efe86 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -838,10 +838,13 @@ void lapic_shutdown(void) local_irq_save(flags); - if (enabled_via_apicbase) - disable_local_APIC(); - else +#ifdef CONFIG_X86_32 + if (!enabled_via_apicbase) clear_local_APIC(); + else +#endif + disable_local_APIC(); + local_irq_restore(flags); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 4fb903b..4880654 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -707,6 +707,12 @@ void disable_local_APIC(void) #endif } +/* + * If Linux enabled the LAPIC against the BIOS default disable it down before + * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and + * not power-off. Additionally clear all LVT entries before disable_local_APIC + * for the case where Linux didn't enable the LAPIC. + */ void lapic_shutdown(void) { unsigned long flags; @@ -716,7 +722,13 @@ void lapic_shutdown(void) local_irq_save(flags); - disable_local_APIC(); +#ifdef CONFIG_X86_32 + if (!enabled_via_apicbase) + clear_local_APIC(); + else +#endif + disable_local_APIC(); + local_irq_restore(flags); } -- cgit v0.10.2 From 36c9d6742897fa414f51c4e9d0f20ab4e6bf942c Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:53 +0400 Subject: x86: apic - unify connect_bsp_APIC Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d4efe86..6d230e9 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1387,6 +1387,7 @@ void smp_error_interrupt(struct pt_regs *regs) */ void __init connect_bsp_APIC(void) { +#ifdef CONFIG_X86_32 if (pic_mode) { /* * Do not trust the local APIC being empty at bootup. @@ -1401,6 +1402,7 @@ void __init connect_bsp_APIC(void) outb(0x70, 0x22); outb(0x01, 0x23); } +#endif enable_apic_mode(); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 4880654..5579e21 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1285,10 +1285,26 @@ asmlinkage void smp_error_interrupt(void) } /** - * * connect_bsp_APIC - attach the APIC to the interrupt system - * */ + * connect_bsp_APIC - attach the APIC to the interrupt system + */ void __init connect_bsp_APIC(void) { +#ifdef CONFIG_X86_32 + if (pic_mode) { + /* + * Do not trust the local APIC being empty at bootup. + */ + clear_local_APIC(); + /* + * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's + * local APIC to INT and NMI lines. + */ + apic_printk(APIC_VERBOSE, "leaving PIC mode, " + "enabling APIC mode.\n"); + outb(0x70, 0x22); + outb(0x01, 0x23); + } +#endif enable_apic_mode(); } -- cgit v0.10.2 From c43da2f5e92fe3bcc256f0c0d6cb858368da5bd9 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:54 +0400 Subject: x86: apic - unify lapic_setup_esr We use 32bit code former for 64bit mode since it's much better implementation and easier to merge. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 6d230e9..3c1562a 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -983,6 +983,16 @@ static void __cpuinit lapic_setup_esr(void) { unsigned long oldvalue, value, maxlvt; if (lapic_is_integrated() && !esr_disable) { + if (esr_disable) { + /* + * Something untraceable is creating bad interrupts on + * secondary quads ... for the moment, just leave the + * ESR disabled - we can't do anything useful with the + * errors anyway - mbligh + */ + printk(KERN_INFO "Leaving ESR disabled.\n"); + return; + } /* !82489DX */ maxlvt = lapic_get_maxlvt(); if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ @@ -1003,16 +1013,7 @@ static void __cpuinit lapic_setup_esr(void) "vector: 0x%08lx after: 0x%08lx\n", oldvalue, value); } else { - if (esr_disable) - /* - * Something untraceable is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk(KERN_INFO "Leaving ESR disabled.\n"); - else - printk(KERN_INFO "No ESR for 82489DX.\n"); + printk(KERN_INFO "No ESR for 82489DX.\n"); } } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 5579e21..d74abf7 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -863,6 +863,45 @@ void __init init_bsp_APIC(void) apic_write(APIC_LVT1, value); } +static void __cpuinit lapic_setup_esr(void) +{ + unsigned long oldvalue, value, maxlvt; + if (lapic_is_integrated() && !esr_disable) { + if (esr_disable) { + /* + * Something untraceable is creating bad interrupts on + * secondary quads ... for the moment, just leave the + * ESR disabled - we can't do anything useful with the + * errors anyway - mbligh + */ + printk(KERN_INFO "Leaving ESR disabled.\n"); + return; + } + /* !82489DX */ + maxlvt = lapic_get_maxlvt(); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + oldvalue = apic_read(APIC_ESR); + + /* enables sending errors */ + value = ERROR_APIC_VECTOR; + apic_write(APIC_LVTERR, value); + /* + * spec says clear errors after enabling vector. + */ + if (maxlvt > 3) + apic_write(APIC_ESR, 0); + value = apic_read(APIC_ESR); + if (value != oldvalue) + apic_printk(APIC_VERBOSE, "ESR value before enabling " + "vector: 0x%08lx after: 0x%08lx\n", + oldvalue, value); + } else { + printk(KERN_INFO "No ESR for 82489DX.\n"); + } +} + + /** * setup_local_APIC - setup the local APIC */ @@ -968,18 +1007,6 @@ void __cpuinit setup_local_APIC(void) preempt_enable(); } -static void __cpuinit lapic_setup_esr(void) -{ - unsigned maxlvt = lapic_get_maxlvt(); - - apic_write(APIC_LVTERR, ERROR_APIC_VECTOR); - /* - * spec says clear errors after enabling vector. - */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); -} - void __cpuinit end_local_APIC_setup(void) { lapic_setup_esr(); -- cgit v0.10.2 From c40aaec6868401671a0ca14ed77e9b2da2d1f223 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:55 +0400 Subject: x86: apic - unify __setup_APIC_LVTT Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3c1562a..65419c7 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -248,8 +248,12 @@ int lapic_get_maxlvt(void) * Local APIC timer */ -/* Clock divisor is set to 16 */ +/* Clock divisor */ +#ifdef CONFG_X86_64 +#define APIC_DIVISOR 1 +#else #define APIC_DIVISOR 16 +#endif /* * This function sets up the local APIC timer, with a timeout of @@ -281,8 +285,8 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) */ tmp_value = apic_read(APIC_TDCR); apic_write(APIC_TDCR, - (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | - APIC_TDR_DIV_16); + (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | + APIC_TDR_DIV_16); if (!oneshot) apic_write(APIC_TMICT, clocks / APIC_DIVISOR); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d74abf7..fe57db9 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -259,8 +259,12 @@ int lapic_get_maxlvt(void) * Local APIC timer */ -/* Clock divisor is set to 1 */ +/* Clock divisor */ +#ifdef CONFG_X86_64 #define APIC_DIVISOR 1 +#else +#define APIC_DIVISOR 16 +#endif /* * This function sets up the local APIC timer, with a timeout of @@ -291,9 +295,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) * Divide PICLK by 16 */ tmp_value = apic_read(APIC_TDCR); - apic_write(APIC_TDCR, (tmp_value - & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) - | APIC_TDR_DIV_16); + apic_write(APIC_TDCR, + (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | + APIC_TDR_DIV_16); if (!oneshot) apic_write(APIC_TMICT, clocks / APIC_DIVISOR); -- cgit v0.10.2 From c177b0bc03e0e11623e2099db42903fb0caf0fd3 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:56 +0400 Subject: x86: apic - unify disconnect_bsp_APIC - just #ifdef added Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 65419c7..3095bb7 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1420,6 +1420,7 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { +#ifdef CONFIG_X86_32 if (pic_mode) { /* * Put the board back into PIC mode (has an effect only on @@ -1431,47 +1432,48 @@ void disconnect_bsp_APIC(int virt_wire_setup) "entering PIC mode.\n"); outb(0x70, 0x22); outb(0x00, 0x23); - } else { - /* Go back to Virtual Wire compatibility mode */ - unsigned long value; + return; + } +#endif - /* For the spurious interrupt use vector F, and enable it */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - value |= 0xf; - apic_write(APIC_SPIV, value); + /* Go back to Virtual Wire compatibility mode */ + unsigned int value; - if (!virt_wire_setup) { - /* - * For LVT0 make it edge triggered, active high, - * external and enabled - */ - value = apic_read(APIC_LVT0); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); - apic_write(APIC_LVT0, value); - } else { - /* Disable LVT0 */ - apic_write(APIC_LVT0, APIC_LVT_MASKED); - } + /* For the spurious interrupt use vector F, and enable it */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + value |= 0xf; + apic_write(APIC_SPIV, value); + if (!virt_wire_setup) { /* - * For LVT1 make it edge triggered, active high, nmi and - * enabled + * For LVT0 make it edge triggered, active high, + * external and enabled */ - value = apic_read(APIC_LVT1); - value &= ~( - APIC_MODE_MASK | APIC_SEND_PENDING | + value = apic_read(APIC_LVT0); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); - apic_write(APIC_LVT1, value); + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); + apic_write(APIC_LVT0, value); + } else { + /* Disable LVT0 */ + apic_write(APIC_LVT0, APIC_LVT_MASKED); } + + /* + * For LVT1 make it edge triggered, active high, + * nmi and enabled + */ + value = apic_read(APIC_LVT1); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); + apic_write(APIC_LVT1, value); } void __cpuinit generic_processor_info(int apicid, int version) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index fe57db9..0d96910 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1348,8 +1348,24 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { +#ifdef CONFIG_X86_32 + if (pic_mode) { + /* + * Put the board back into PIC mode (has an effect only on + * certain older boards). Note that APIC interrupts, including + * IPIs, won't work beyond this point! The only exception are + * INIT IPIs. + */ + apic_printk(APIC_VERBOSE, "disabling APIC mode, " + "entering PIC mode.\n"); + outb(0x70, 0x22); + outb(0x00, 0x23); + return; + } +#endif + /* Go back to Virtual Wire compatibility mode */ - unsigned long value; + unsigned int value; /* For the spurious interrupt use vector F, and enable it */ value = apic_read(APIC_SPIV); @@ -1375,7 +1391,10 @@ void disconnect_bsp_APIC(int virt_wire_setup) apic_write(APIC_LVT0, APIC_LVT_MASKED); } - /* For LVT1 make it edge triggered, active high, nmi and enabled */ + /* + * For LVT1 make it edge triggered, active high, + * nmi and enabled + */ value = apic_read(APIC_LVT1); value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | -- cgit v0.10.2 From 1b313f4a6d7bee7b2c034b3f1e203bc360a71cca Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:57 +0400 Subject: x86: apic - generic_processor_info - use physid_set instead of phys_cpu and physids_or - set phys_cpu_present_map bit AFTER check for allowed number of processors - add checking for APIC valid version in 64bit mode (mostly not needed but added for merging purpose) - add apic_version definition for 64bit mode which is used now Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3095bb7..c3a252b 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1480,7 +1480,6 @@ void __cpuinit generic_processor_info(int apicid, int version) { int cpu; cpumask_t tmp_map; - physid_mask_t phys_cpu; /* * Validate version @@ -1493,9 +1492,6 @@ void __cpuinit generic_processor_info(int apicid, int version) } apic_version[apicid] = version; - phys_cpu = apicid_to_cpu_present(apicid); - physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); - if (num_processors >= NR_CPUS) { printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." " Processor ignored.\n", NR_CPUS); @@ -1512,17 +1508,19 @@ void __cpuinit generic_processor_info(int apicid, int version) cpus_complement(tmp_map, cpu_present_map); cpu = first_cpu(tmp_map); - if (apicid == boot_cpu_physical_apicid) + physid_set(apicid, phys_cpu_present_map); + if (apicid == boot_cpu_physical_apicid) { /* * x86_bios_cpu_apicid is required to have processors listed * in same order as logical cpu numbers. Hence the first * entry is BSP, and so on. */ cpu = 0; - + } if (apicid > max_physical_apicid) max_physical_apicid = apicid; +#ifdef CONFIG_X86_32 /* * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y * but we need to work other dependencies like SMP_SUSPEND etc @@ -1542,7 +1540,9 @@ void __cpuinit generic_processor_info(int apicid, int version) def_to_bigsmp = 1; } } -#ifdef CONFIG_SMP +#endif + +#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) /* are we being called early in kernel startup? */ if (early_per_cpu_ptr(x86_cpu_to_apicid)) { u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); @@ -1555,6 +1555,7 @@ void __cpuinit generic_processor_info(int apicid, int version) per_cpu(x86_bios_cpu_apicid, cpu) = apicid; } #endif + cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 0d96910..76c2077 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1215,6 +1215,8 @@ void __init init_apic_mappings(void) * This initializes the IO-APIC and APIC hardware if this is * a UP kernel. */ +int apic_version[MAX_APICS]; + int __init APIC_init_uniprocessor(void) { if (disable_apic) { @@ -1409,15 +1411,26 @@ void __cpuinit generic_processor_info(int apicid, int version) int cpu; cpumask_t tmp_map; + /* + * Validate version + */ + if (version == 0x0) { + printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " + "fixing up to 0x10. (tell your hw vendor)\n", + version); + version = 0x10; + } + apic_version[apicid] = version; + if (num_processors >= NR_CPUS) { printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); + " Processor ignored.\n", NR_CPUS); return; } if (num_processors >= maxcpus) { printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); + " Processor ignored.\n", maxcpus); return; } @@ -1437,6 +1450,29 @@ void __cpuinit generic_processor_info(int apicid, int version) if (apicid > max_physical_apicid) max_physical_apicid = apicid; +#ifdef CONFIG_X86_32 + /* + * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y + * but we need to work other dependencies like SMP_SUSPEND etc + * before this can be done without some confusion. + * if (CPU_HOTPLUG_ENABLED || num_processors > 8) + * - Ashok Raj + */ + if (max_physical_apicid >= 8) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(version)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: + def_to_bigsmp = 1; + } + } +#endif + +#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) /* are we being called early in kernel startup? */ if (early_per_cpu_ptr(x86_cpu_to_apicid)) { u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); @@ -1448,6 +1484,7 @@ void __cpuinit generic_processor_info(int apicid, int version) per_cpu(x86_cpu_to_apicid, cpu) = apicid; per_cpu(x86_bios_cpu_apicid, cpu) = apicid; } +#endif cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); -- cgit v0.10.2 From fa6b95fc7c6f2f3eb1560e1f33cd13197546c5a0 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:58 +0400 Subject: x86: apic - unify end_local_APIC_setup Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index c3a252b..f188232 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1155,13 +1155,15 @@ void __cpuinit setup_local_APIC(void) void __cpuinit end_local_APIC_setup(void) { - unsigned long value; - lapic_setup_esr(); + +#ifdef CONFIG_X86_32 + unsigned int value; /* Disable the local apic timer */ value = apic_read(APIC_LVTT); value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); apic_write(APIC_LVTT, value); +#endif setup_apic_nmi_watchdog(NULL); apic_pm_activate(); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 76c2077..eec10b3 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1014,6 +1014,15 @@ void __cpuinit setup_local_APIC(void) void __cpuinit end_local_APIC_setup(void) { lapic_setup_esr(); + +#ifdef CONFIG_X86_32 + unsigned int value; + /* Disable the local apic timer */ + value = apic_read(APIC_LVTT); + value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, value); +#endif + setup_apic_nmi_watchdog(NULL); apic_pm_activate(); } -- cgit v0.10.2 From 0b23e8cf553f5e706b0057363f1319867bcd1a7d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:59 +0400 Subject: x86: apic - unify local_apic_timer_interrupt Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index f188232..af227bc 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -685,7 +685,11 @@ static void local_apic_timer_interrupt(void) /* * the NMI deadlock-detector uses this. */ +#ifdef CONFIG_X86_64 + add_pda(apic_timer_irqs, 1); +#else per_cpu(irq_stat, cpu).apic_timer_irqs++; +#endif evt->event_handler(evt); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index eec10b3..a9ad2cb 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -567,7 +567,11 @@ static void local_apic_timer_interrupt(void) /* * the NMI deadlock-detector uses this. */ +#ifdef CONFIG_X86_64 add_pda(apic_timer_irqs, 1); +#else + per_cpu(irq_stat, cpu).apic_timer_irqs++; +#endif evt->event_handler(evt); } -- cgit v0.10.2 From 79af9bec60e6e218a1e48e8830d603d64a7fc441 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:00 +0400 Subject: x86: apic - unify apic_set_verbosity Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index af227bc..acbc4de 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1768,13 +1768,24 @@ early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); static int __init apic_set_verbosity(char *arg) { - if (!arg) + if (!arg) { +#ifdef CONFIG_X86_64 + skip_ioapic_setup = 0; + ioapic_force = 1; + return 0; +#endif return -EINVAL; + } - if (strcmp(arg, "debug") == 0) + if (strcmp("debug", arg) == 0) apic_verbosity = APIC_DEBUG; - else if (strcmp(arg, "verbose") == 0) + else if (strcmp("verbose", arg) == 0) apic_verbosity = APIC_VERBOSE; + else { + printk(KERN_WARNING "APIC Verbosity level %s not recognised" + " use apic=verbose or apic=debug\n", arg); + return -EINVAL; + } return 0; } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index a9ad2cb..9997818 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1759,27 +1759,6 @@ early_param("nox2apic", setup_nox2apic); /* * APIC command line parameters */ -static int __init apic_set_verbosity(char *str) -{ - if (str == NULL) { - skip_ioapic_setup = 0; - ioapic_force = 1; - return 0; - } - if (strcmp("debug", str) == 0) - apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", str) == 0) - apic_verbosity = APIC_VERBOSE; - else { - printk(KERN_WARNING "APIC Verbosity level %s not recognised" - " use apic=verbose or apic=debug\n", str); - return -EINVAL; - } - - return 0; -} -early_param("apic", apic_set_verbosity); - static __init int setup_disableapic(char *str) { disable_apic = 1; @@ -1824,6 +1803,31 @@ static __init int setup_apicpmtimer(char *s) } __setup("apicpmtimer", setup_apicpmtimer); +static int __init apic_set_verbosity(char *arg) +{ + if (!arg) { +#ifdef CONFIG_X86_64 + skip_ioapic_setup = 0; + ioapic_force = 1; + return 0; +#endif + return -EINVAL; + } + + if (strcmp("debug", arg) == 0) + apic_verbosity = APIC_DEBUG; + else if (strcmp("verbose", arg) == 0) + apic_verbosity = APIC_VERBOSE; + else { + printk(KERN_WARNING "APIC Verbosity level %s not recognised" + " use apic=verbose or apic=debug\n", arg); + return -EINVAL; + } + + return 0; +} +early_param("apic", apic_set_verbosity); + static int __init lapic_insert_resource(void) { if (!apic_phys) -- cgit v0.10.2 From 789fa735712d726b5cfa5c6be57171b5637a4872 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:01 +0400 Subject: x86: apic - unify disableapic and nolapic setup handlers Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index acbc4de..5680bda 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1737,13 +1737,20 @@ static int __init parse_lapic(char *arg) } early_param("lapic", parse_lapic); -static int __init parse_nolapic(char *arg) +static int __init setup_disableapic(char *arg) { disable_apic = 1; setup_clear_cpu_cap(X86_FEATURE_APIC); return 0; } -early_param("nolapic", parse_nolapic); +early_param("disableapic", setup_disableapic); + +/* same as disableapic, for compatibility */ +static int __init setup_nolapic(char *arg) +{ + return setup_disableapic(arg); +} +early_param("nolapic", setup_nolapic); static int __init parse_disable_apic_timer(char *arg) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 9997818..7a31718 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1759,7 +1759,7 @@ early_param("nox2apic", setup_nox2apic); /* * APIC command line parameters */ -static __init int setup_disableapic(char *str) +static int __init setup_disableapic(char *arg) { disable_apic = 1; setup_clear_cpu_cap(X86_FEATURE_APIC); @@ -1768,9 +1768,9 @@ static __init int setup_disableapic(char *str) early_param("disableapic", setup_disableapic); /* same as disableapic, for compatibility */ -static __init int setup_nolapic(char *str) +static int __init setup_nolapic(char *arg) { - return setup_disableapic(str); + return setup_disableapic(arg); } early_param("nolapic", setup_nolapic); -- cgit v0.10.2 From 3415610b8e38b26b52a430b8846665c2d6bb3558 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:02 +0400 Subject: x86: apic - rearrange parse_lapic_timer_c2_ok Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 5680bda..885e306 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1752,6 +1752,13 @@ static int __init setup_nolapic(char *arg) } early_param("nolapic", setup_nolapic); +static int __init parse_lapic_timer_c2_ok(char *arg) +{ + local_apic_timer_c2_ok = 1; + return 0; +} +early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); + static int __init parse_disable_apic_timer(char *arg) { disable_apic_timer = 1; @@ -1766,13 +1773,6 @@ static int __init parse_nolapic_timer(char *arg) } early_param("nolapic_timer", parse_nolapic_timer); -static int __init parse_lapic_timer_c2_ok(char *arg) -{ - local_apic_timer_c2_ok = 1; - return 0; -} -early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); - static int __init apic_set_verbosity(char *arg) { if (!arg) { -- cgit v0.10.2 From e75bedf415f300a08e9bbcc755784e488574a73e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:03 +0400 Subject: x86: apic - lapic_resume 32bit - unification fix Just add parenthesis to be identical of current 64bit implementation (so diff will not complain). Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 885e306..e975562 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1646,6 +1646,7 @@ static int lapic_resume(struct sys_device *dev) enable_x2apic(); else #endif + { /* * Make sure the APICBASE points to the right address * @@ -1656,6 +1657,7 @@ static int lapic_resume(struct sys_device *dev) l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; wrmsr(MSR_IA32_APICBASE, l, h); + } apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); -- cgit v0.10.2 From 1b4ee4e4096d430c4c12516c1d30a6b0b4f9e9e4 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 23:12:33 +0400 Subject: x86: apic - compilation warnings fix Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index e975562..b8d80c2 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1162,11 +1162,13 @@ void __cpuinit end_local_APIC_setup(void) lapic_setup_esr(); #ifdef CONFIG_X86_32 - unsigned int value; - /* Disable the local apic timer */ - value = apic_read(APIC_LVTT); - value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, value); + { + unsigned int value; + /* Disable the local apic timer */ + value = apic_read(APIC_LVTT); + value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, value); + } #endif setup_apic_nmi_watchdog(NULL); @@ -1426,6 +1428,8 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { + unsigned int value; + #ifdef CONFIG_X86_32 if (pic_mode) { /* @@ -1443,7 +1447,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) #endif /* Go back to Virtual Wire compatibility mode */ - unsigned int value; /* For the spurious interrupt use vector F, and enable it */ value = apic_read(APIC_SPIV); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 7a31718..37e0376 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1020,11 +1020,13 @@ void __cpuinit end_local_APIC_setup(void) lapic_setup_esr(); #ifdef CONFIG_X86_32 - unsigned int value; - /* Disable the local apic timer */ - value = apic_read(APIC_LVTT); - value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, value); + { + unsigned int value; + /* Disable the local apic timer */ + value = apic_read(APIC_LVTT); + value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, value); + } #endif setup_apic_nmi_watchdog(NULL); @@ -1363,6 +1365,8 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { + unsigned int value; + #ifdef CONFIG_X86_32 if (pic_mode) { /* @@ -1380,7 +1384,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) #endif /* Go back to Virtual Wire compatibility mode */ - unsigned int value; /* For the spurious interrupt use vector F, and enable it */ value = apic_read(APIC_SPIV); -- cgit v0.10.2 From 467cd0529a480c9c25f06154b788d1d1ba77828a Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Mon, 18 Aug 2008 16:56:29 -0700 Subject: x86: early_printk.c trivial sparse fixes arch/x86/kernel/early_printk.c:404:13: warning: incorrect type in assignment (different base types) arch/x86/kernel/early_printk.c:404:13: expected restricted __le16 [assigned] [usertype] wValue arch/x86/kernel/early_printk.c:404:13: got int [signed] value arch/x86/kernel/early_printk.c:405:13: warning: incorrect type in assignment (different base types) arch/x86/kernel/early_printk.c:405:13: expected restricted __le16 [assigned] [usertype] wIndex arch/x86/kernel/early_printk.c:405:13: got int [signed] index arch/x86/kernel/early_printk.c:406:14: warning: incorrect type in assignment (different base types) arch/x86/kernel/early_printk.c:406:14: expected restricted __le16 [assigned] [usertype] wLength arch/x86/kernel/early_printk.c:406:14: got int [signed] size arch/x86/kernel/early_printk.c:845:16: warning: Using plain integer as NULL pointer arch/x86/kernel/early_printk.c:992:13: warning: symbol 'enable_debug_console' was not declared. Should it be static? Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 28155ac..825e913 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -401,9 +401,9 @@ static int dbgp_control_msg(unsigned devnum, int requesttype, int request, /* Compute the control message */ req.bRequestType = requesttype; req.bRequest = request; - req.wValue = value; - req.wIndex = index; - req.wLength = size; + req.wValue = cpu_to_le16(value); + req.wIndex = cpu_to_le16(index); + req.wLength = cpu_to_le16(size); pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP); addr = DBGP_EPADDR(devnum, 0); @@ -842,7 +842,7 @@ static int __init early_dbgp_init(char *s) ret = ehci_setup(); if (ret < 0) { dbgp_printk("ehci_setup failed\n"); - ehci_debug = 0; + ehci_debug = NULL; return -1; } @@ -989,7 +989,7 @@ static int __init setup_early_printk(char *buf) return 0; } -void __init enable_debug_console(char *buf) +static void __init enable_debug_console(char *buf) { #ifdef DBGP_DEBUG struct console *old_early_console = NULL; -- cgit v0.10.2 From e2fe16d91228a005811335fbc4fbad5d4f5b75af Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 15 Aug 2008 15:36:31 -0700 Subject: x86: boot: stub out unimplemented CPU feature words The CPU feature detection code in the boot code is somewhat minimal, and doesn't include all possible CPUID words. In particular, it doesn't contain the code for CPU feature words 2 (Transmeta), 3 (Linux-specific), 5 (VIA), or 7 (scattered). Zero them out, so we can still set those bits as known at compile time; in particular, this allows creating a Linux-specific NOPL flag and have it required (and therefore resolvable at compile time) in 64-bit mode. Signed-off-by: H. Peter Anvin diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 7804389..19b14f7 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -46,12 +46,12 @@ static const u32 req_flags[NCAPINTS] = { REQUIRED_MASK0, REQUIRED_MASK1, - REQUIRED_MASK2, - REQUIRED_MASK3, + 0, /* REQUIRED_MASK2 not implemented in this file */ + 0, /* REQUIRED_MASK3 not implemented in this file */ REQUIRED_MASK4, - REQUIRED_MASK5, + 0, /* REQUIRED_MASK5 not implemented in this file */ REQUIRED_MASK6, - REQUIRED_MASK7, + 0, /* REQUIRED_MASK7 not implemented in this file */ }; #define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a)) -- cgit v0.10.2 From 7e00df5818964298c9821365a6cb7a8304227c5c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 18 Aug 2008 17:39:32 -0700 Subject: x86: add NOPL as a synthetic CPU feature bit The long noops ("NOPL") are supposed to be detected by family >= 6. Unfortunately, several non-Intel x86 implementations, both hardware and software, don't obey this dictum. Instead, probe for NOPL directly by executing a NOPL instruction and see if we get #UD. Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80ab20d..0785b3c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #include @@ -341,6 +342,35 @@ static void __init early_cpu_detect(void) early_get_cap(c); } +/* + * The NOPL instruction is supposed to exist on all CPUs with + * family >= 6, unfortunately, that's not true in practice because + * of early VIA chips and (more importantly) broken virtualizers that + * are not easy to detect. Hence, probe for it based on first + * principles. + */ +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) +{ + const u32 nopl_signature = 0x888c53b1; /* Random number */ + u32 has_nopl = nopl_signature; + + clear_cpu_cap(c, X86_FEATURE_NOPL); + if (c->x86 >= 6) { + asm volatile("\n" + "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ + "2:\n" + " .section .fixup,\"ax\"\n" + "3: xor %0,%0\n" + " jmp 2b\n" + " .previous\n" + _ASM_EXTABLE(1b,3b) + : "+a" (has_nopl)); + + if (has_nopl == nopl_signature) + set_cpu_cap(c, X86_FEATURE_NOPL); + } +} + static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { u32 tfms, xlvl; @@ -395,8 +425,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) } init_scattered_cpuid_features(c); + detect_nopl(c); } - } static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index dd6e3f1..c3afba5 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #ifdef CONFIG_X86_LOCAL_APIC #include @@ -215,6 +216,39 @@ static void __init early_cpu_support_print(void) } } +/* + * The NOPL instruction is supposed to exist on all CPUs with + * family >= 6, unfortunately, that's not true in practice because + * of early VIA chips and (more importantly) broken virtualizers that + * are not easy to detect. Hence, probe for it based on first + * principles. + * + * Note: no 64-bit chip is known to lack these, but put the code here + * for consistency with 32 bits, and to make it utterly trivial to + * diagnose the problem should it ever surface. + */ +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) +{ + const u32 nopl_signature = 0x888c53b1; /* Random number */ + u32 has_nopl = nopl_signature; + + clear_cpu_cap(c, X86_FEATURE_NOPL); + if (c->x86 >= 6) { + asm volatile("\n" + "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ + "2:\n" + " .section .fixup,\"ax\"\n" + "3: xor %0,%0\n" + " jmp 2b\n" + " .previous\n" + _ASM_EXTABLE(1b,3b) + : "+a" (has_nopl)); + + if (has_nopl == nopl_signature) + set_cpu_cap(c, X86_FEATURE_NOPL); + } +} + static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); void __init early_cpu_init(void) @@ -313,6 +347,8 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86_phys_bits = eax & 0xff; } + detect_nopl(c); + if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) cpu_devs[c->x86_vendor]->c_early_init(c); diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c index e43ad4a..c901779 100644 --- a/arch/x86/kernel/cpu/feature_names.c +++ b/arch/x86/kernel/cpu/feature_names.c @@ -39,7 +39,8 @@ const char * const x86_cap_flags[NCAPINTS*32] = { NULL, NULL, NULL, NULL, "constant_tsc", "up", NULL, "arch_perfmon", "pebs", "bts", NULL, NULL, - "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "rep_good", NULL, NULL, NULL, + "nopl", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Intel-defined (#2) */ diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 2f5a792..be8b2ad 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -72,14 +72,15 @@ #define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */ #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ -#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ -#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 (3*32+14) /* syscall in ia32 userspace */ -#define X86_FEATURE_SYSENTER32 (3*32+15) /* sysenter in ia32 userspace */ +#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ +#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 (3*32+14) /* syscall in ia32 userspace */ +#define X86_FEATURE_SYSENTER32 (3*32+15) /* sysenter in ia32 userspace */ #define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */ #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */ #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */ -#define X86_FEATURE_11AP (3*32+19) /* Bad local APIC aka 11AP */ +#define X86_FEATURE_11AP (3*32+19) /* Bad local APIC aka 11AP */ +#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ diff --git a/include/asm-x86/required-features.h b/include/asm-x86/required-features.h index adec887..5c2ff4b 100644 --- a/include/asm-x86/required-features.h +++ b/include/asm-x86/required-features.h @@ -41,6 +41,12 @@ # define NEED_3DNOW 0 #endif +#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64) +# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31)) +#else +# define NEED_NOPL 0 +#endif + #ifdef CONFIG_X86_64 #define NEED_PSE 0 #define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) @@ -67,7 +73,7 @@ #define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW) #define REQUIRED_MASK2 0 -#define REQUIRED_MASK3 0 +#define REQUIRED_MASK3 (NEED_NOPL) #define REQUIRED_MASK4 0 #define REQUIRED_MASK5 0 #define REQUIRED_MASK6 0 -- cgit v0.10.2 From f1c5d30e1d79bbfb60eaf189db862d3cb2bcac92 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 18 Aug 2008 17:50:33 -0700 Subject: x86: use X86_FEATURE_NOPL in alternatives Use X86_FEATURE_NOPL to determine if it is safe to use P6 NOPs in alternatives. Also, replace table and loop with simple if statement. Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 2763cb3..65a0c1b 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -145,35 +145,25 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { extern char __vsyscall_0; const unsigned char *const *find_nop_table(void) { - return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || - boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_has(X86_FEATURE_NOPL)) + return p6_nops; + else + return k8_nops; } #else /* CONFIG_X86_64 */ -static const struct nop { - int cpuid; - const unsigned char *const *noptable; -} noptypes[] = { - { X86_FEATURE_K8, k8_nops }, - { X86_FEATURE_K7, k7_nops }, - { X86_FEATURE_P4, p6_nops }, - { X86_FEATURE_P3, p6_nops }, - { -1, NULL } -}; - const unsigned char *const *find_nop_table(void) { - const unsigned char *const *noptable = intel_nops; - int i; - - for (i = 0; noptypes[i].cpuid >= 0; i++) { - if (boot_cpu_has(noptypes[i].cpuid)) { - noptable = noptypes[i].noptable; - break; - } - } - return noptable; + if (boot_cpu_has(X86_FEATURE_K8)) + return k8_nops; + else if (boot_cpu_has(X86_FEATURE_K7)) + return k7_nops; + else if (boot_cpu_has(X86_FEATURE_NOPL)) + return p6_nops; + else + return intel_nops; } #endif /* CONFIG_X86_64 */ -- cgit v0.10.2 From 8df9676d6402563da91427e8d9f2da8a4598aede Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 18 Aug 2008 18:13:33 -0700 Subject: x86: consistency cleanups Rename _ASM_MOV_UL to _ASM_MOV for consistency with other _ASM_ instructions (_ASM_ADD, _ASM_SUB and so on.) Add ASM_SP, _ASM_BP, _ASM_SI, and _ASM_DI for consistency with _ASM_[ABCD]X. Signed-off-by: H. Peter Anvin diff --git a/include/asm-x86/asm.h b/include/asm-x86/asm.h index 9722032..56be78f 100644 --- a/include/asm-x86/asm.h +++ b/include/asm-x86/asm.h @@ -20,17 +20,22 @@ #define _ASM_PTR __ASM_SEL(.long, .quad) #define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8) -#define _ASM_MOV_UL __ASM_SIZE(mov) +#define _ASM_MOV __ASM_SIZE(mov) #define _ASM_INC __ASM_SIZE(inc) #define _ASM_DEC __ASM_SIZE(dec) #define _ASM_ADD __ASM_SIZE(add) #define _ASM_SUB __ASM_SIZE(sub) #define _ASM_XADD __ASM_SIZE(xadd) + #define _ASM_AX __ASM_REG(ax) #define _ASM_BX __ASM_REG(bx) #define _ASM_CX __ASM_REG(cx) #define _ASM_DX __ASM_REG(dx) +#define _ASM_SP __ASM_REG(sp) +#define _ASM_BP __ASM_REG(bp) +#define _ASM_SI __ASM_REG(si) +#define _ASM_DI __ASM_REG(di) /* Exception table entry */ # define _ASM_EXTABLE(from,to) \ diff --git a/include/asm-x86/resume-trace.h b/include/asm-x86/resume-trace.h index 8d9f0b4..e219b4c 100644 --- a/include/asm-x86/resume-trace.h +++ b/include/asm-x86/resume-trace.h @@ -7,7 +7,7 @@ do { \ if (pm_trace_enabled) { \ const void *tracedata; \ - asm volatile(_ASM_MOV_UL " $1f,%0\n" \ + asm volatile(_ASM_MOV " $1f,%0\n" \ ".section .tracedata,\"a\"\n" \ "1:\t.word %c1\n\t" \ _ASM_PTR " %c2\n" \ -- cgit v0.10.2 From 20211e4d344729f4d4c93da37a590fc1c3a1fd9b Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Mon, 18 Aug 2008 21:25:38 +0200 Subject: x86: Coding style fixes to arch/x86/oprofile/op_model_p4.c A coding style patch to arch/x86/oprofile/op_model_p4.c that removes 87 errors and 4 warnings. Before: total: 89 errors, 13 warnings, 722 lines checked After: total: 2 errors, 9 warnings, 721 lines checked Compile tested, binary verified as follow: paolo@paolo-desktop:~/linux.trees.git$ size /tmp/op_model_p4.o.* text data bss dec hex filename 2691 968 32 3691 e6b /tmp/op_model_p4.o.after 2691 968 32 3691 e6b /tmp/op_model_p4.o.before paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/op_model_p4.o.* 8c1c9823bab33333e1f7f76574e62561 /tmp/op_model_p4.o.after 8c1c9823bab33333e1f7f76574e62561 /tmp/op_model_p4.o.before Signed-off-by: Paolo Ciarrocchi Cc: robert.richter@amd.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 56b4757..43ac5af 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -10,11 +10,12 @@ #include #include +#include +#include #include -#include #include #include -#include + #include "op_x86_model.h" #include "op_counter.h" @@ -40,7 +41,7 @@ static unsigned int num_controls = NUM_CONTROLS_NON_HT; static inline void setup_num_counters(void) { #ifdef CONFIG_SMP - if (smp_num_siblings == 2){ + if (smp_num_siblings == 2) { num_counters = NUM_COUNTERS_HT2; num_controls = NUM_CONTROLS_HT2; } @@ -86,7 +87,7 @@ struct p4_event_binding { #define CTR_FLAME_2 (1 << 6) #define CTR_IQ_5 (1 << 7) -static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { +static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = { { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 }, { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 }, { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 }, @@ -97,32 +98,32 @@ static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 } }; -#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT +#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT) /* p4 event codes in libop/op_event.h are indices into this table. */ static struct p4_event_binding p4_events[NUM_EVENTS] = { - + { /* BRANCH_RETIRED */ - 0x05, 0x06, + 0x05, 0x06, { {CTR_IQ_4, MSR_P4_CRU_ESCR2}, {CTR_IQ_5, MSR_P4_CRU_ESCR3} } }, - + { /* MISPRED_BRANCH_RETIRED */ - 0x04, 0x03, + 0x04, 0x03, { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, { CTR_IQ_5, MSR_P4_CRU_ESCR1} } }, - + { /* TC_DELIVER_MODE */ 0x01, 0x01, - { { CTR_MS_0, MSR_P4_TC_ESCR0}, + { { CTR_MS_0, MSR_P4_TC_ESCR0}, { CTR_MS_2, MSR_P4_TC_ESCR1} } }, - + { /* BPU_FETCH_REQUEST */ - 0x00, 0x03, + 0x00, 0x03, { { CTR_BPU_0, MSR_P4_BPU_ESCR0}, { CTR_BPU_2, MSR_P4_BPU_ESCR1} } }, @@ -146,7 +147,7 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { }, { /* LOAD_PORT_REPLAY */ - 0x02, 0x04, + 0x02, 0x04, { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } }, @@ -170,43 +171,43 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { }, { /* BSQ_CACHE_REFERENCE */ - 0x07, 0x0c, + 0x07, 0x0c, { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, { CTR_BPU_2, MSR_P4_BSU_ESCR1} } }, { /* IOQ_ALLOCATION */ - 0x06, 0x03, + 0x06, 0x03, { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, { 0, 0 } } }, { /* IOQ_ACTIVE_ENTRIES */ - 0x06, 0x1a, + 0x06, 0x1a, { { CTR_BPU_2, MSR_P4_FSB_ESCR1}, { 0, 0 } } }, { /* FSB_DATA_ACTIVITY */ - 0x06, 0x17, + 0x06, 0x17, { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, { CTR_BPU_2, MSR_P4_FSB_ESCR1} } }, { /* BSQ_ALLOCATION */ - 0x07, 0x05, + 0x07, 0x05, { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, { 0, 0 } } }, { /* BSQ_ACTIVE_ENTRIES */ 0x07, 0x06, - { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */}, + { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */}, { 0, 0 } } }, { /* X87_ASSIST */ - 0x05, 0x03, + 0x05, 0x03, { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, { CTR_IQ_5, MSR_P4_CRU_ESCR3} } }, @@ -216,21 +217,21 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } }, - + { /* PACKED_SP_UOP */ - 0x01, 0x08, + 0x01, 0x08, { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } }, - + { /* PACKED_DP_UOP */ - 0x01, 0x0c, + 0x01, 0x0c, { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } }, { /* SCALAR_SP_UOP */ - 0x01, 0x0a, + 0x01, 0x0a, { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } }, @@ -242,31 +243,31 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { }, { /* 64BIT_MMX_UOP */ - 0x01, 0x02, + 0x01, 0x02, { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } }, - + { /* 128BIT_MMX_UOP */ - 0x01, 0x1a, + 0x01, 0x1a, { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } }, { /* X87_FP_UOP */ - 0x01, 0x04, + 0x01, 0x04, { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } }, - + { /* X87_SIMD_MOVES_UOP */ - 0x01, 0x2e, + 0x01, 0x2e, { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } }, - + { /* MACHINE_CLEAR */ - 0x05, 0x02, + 0x05, 0x02, { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, { CTR_IQ_5, MSR_P4_CRU_ESCR3} } }, @@ -276,9 +277,9 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, { CTR_BPU_2, MSR_P4_FSB_ESCR1} } }, - + { /* TC_MS_XFER */ - 0x00, 0x05, + 0x00, 0x05, { { CTR_MS_0, MSR_P4_MS_ESCR0}, { CTR_MS_2, MSR_P4_MS_ESCR1} } }, @@ -308,7 +309,7 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { }, { /* INSTR_RETIRED */ - 0x04, 0x02, + 0x04, 0x02, { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, { CTR_IQ_5, MSR_P4_CRU_ESCR1} } }, @@ -319,14 +320,14 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { { CTR_IQ_5, MSR_P4_CRU_ESCR1} } }, - { /* UOP_TYPE */ - 0x02, 0x02, + { /* UOP_TYPE */ + 0x02, 0x02, { { CTR_IQ_4, MSR_P4_RAT_ESCR0}, { CTR_IQ_5, MSR_P4_RAT_ESCR1} } }, { /* RETIRED_MISPRED_BRANCH_TYPE */ - 0x02, 0x05, + 0x02, 0x05, { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, { CTR_MS_2, MSR_P4_TBPU_ESCR1} } }, @@ -349,8 +350,8 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) -#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) -#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) +#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0) +#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0) #define CCCR_RESERVED_BITS 0x38030FFF #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) @@ -360,15 +361,15 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) -#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) -#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) +#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0) +#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0) #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) -#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) -#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) -#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) -#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) +#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) +#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) +#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0) +#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0) #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) @@ -380,7 +381,7 @@ static unsigned int get_stagger(void) #ifdef CONFIG_SMP int cpu = smp_processor_id(); return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu))); -#endif +#endif return 0; } @@ -395,25 +396,23 @@ static unsigned long reset_value[NUM_COUNTERS_NON_HT]; static void p4_fill_in_addresses(struct op_msrs * const msrs) { - unsigned int i; + unsigned int i; unsigned int addr, cccraddr, stag; setup_num_counters(); stag = get_stagger(); /* initialize some registers */ - for (i = 0; i < num_counters; ++i) { + for (i = 0; i < num_counters; ++i) msrs->counters[i].addr = 0; - } - for (i = 0; i < num_controls; ++i) { + for (i = 0; i < num_controls; ++i) msrs->controls[i].addr = 0; - } - + /* the counter & cccr registers we pay attention to */ for (i = 0; i < num_counters; ++i) { addr = p4_counters[VIRT_CTR(stag, i)].counter_address; cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address; - if (reserve_perfctr_nmi(addr)){ + if (reserve_perfctr_nmi(addr)) { msrs->counters[i].addr = addr; msrs->controls[i].addr = cccraddr; } @@ -447,22 +446,22 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) if (reserve_evntsel_nmi(addr)) msrs->controls[i].addr = addr; } - + for (addr = MSR_P4_MS_ESCR0 + stag; - addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { + addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { if (reserve_evntsel_nmi(addr)) msrs->controls[i].addr = addr; } - + for (addr = MSR_P4_IX_ESCR0 + stag; - addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { + addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { if (reserve_evntsel_nmi(addr)) msrs->controls[i].addr = addr; } /* there are 2 remaining non-contiguously located ESCRs */ - if (num_counters == NUM_COUNTERS_NON_HT) { + if (num_counters == NUM_COUNTERS_NON_HT) { /* standard non-HT CPUs handle both remaining ESCRs*/ if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; @@ -498,20 +497,20 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) unsigned int stag; stag = get_stagger(); - + /* convert from counter *number* to counter *bit* */ counter_bit = 1 << VIRT_CTR(stag, ctr); - + /* find our event binding structure. */ if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) { - printk(KERN_ERR - "oprofile: P4 event code 0x%lx out of range\n", + printk(KERN_ERR + "oprofile: P4 event code 0x%lx out of range\n", counter_config[ctr].event); return; } - + ev = &(p4_events[counter_config[ctr].event - 1]); - + for (i = 0; i < maxbind; i++) { if (ev->bindings[i].virt_counter & counter_bit) { @@ -526,25 +525,24 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) ESCR_SET_OS_1(escr, counter_config[ctr].kernel); } ESCR_SET_EVENT_SELECT(escr, ev->event_select); - ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); + ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); ESCR_WRITE(escr, high, ev, i); - + /* modify CCCR */ CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); CCCR_CLEAR(cccr); CCCR_SET_REQUIRED_BITS(cccr); CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); - if (stag == 0) { + if (stag == 0) CCCR_SET_PMI_OVF_0(cccr); - } else { + else CCCR_SET_PMI_OVF_1(cccr); - } CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); return; } } - printk(KERN_ERR + printk(KERN_ERR "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n", counter_config[ctr].event, stag, ctr); } @@ -559,14 +557,14 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) stag = get_stagger(); rdmsr(MSR_IA32_MISC_ENABLE, low, high); - if (! MISC_PMC_ENABLED_P(low)) { + if (!MISC_PMC_ENABLED_P(low)) { printk(KERN_ERR "oprofile: P4 PMC not available\n"); return; } /* clear the cccrs we will use */ for (i = 0 ; i < num_counters ; i++) { - if (unlikely(!CTRL_IS_RESERVED(msrs,i))) + if (unlikely(!CTRL_IS_RESERVED(msrs, i))) continue; rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); CCCR_CLEAR(low); @@ -576,14 +574,14 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) /* clear all escrs (including those outside our concern) */ for (i = num_counters; i < num_controls; i++) { - if (unlikely(!CTRL_IS_RESERVED(msrs,i))) + if (unlikely(!CTRL_IS_RESERVED(msrs, i))) continue; wrmsr(msrs->controls[i].addr, 0, 0); } /* setup all counters */ for (i = 0 ; i < num_counters ; ++i) { - if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) { + if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) { reset_value[i] = counter_config[i].count; pmc_setup_one_p4_counter(i); CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); @@ -603,11 +601,11 @@ static int p4_check_ctrs(struct pt_regs * const regs, stag = get_stagger(); for (i = 0; i < num_counters; ++i) { - - if (!reset_value[i]) + + if (!reset_value[i]) continue; - /* + /* * there is some eccentricity in the hardware which * requires that we perform 2 extra corrections: * @@ -616,24 +614,24 @@ static int p4_check_ctrs(struct pt_regs * const regs, * * - write the counter back twice to ensure it gets * updated properly. - * + * * the former seems to be related to extra NMIs happening * during the current NMI; the latter is reported as errata * N15 in intel doc 249199-029, pentium 4 specification * update, though their suggested work-around does not * appear to solve the problem. */ - + real = VIRT_CTR(stag, i); CCCR_READ(low, high, real); - CTR_READ(ctr, high, real); + CTR_READ(ctr, high, real); if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { oprofile_add_sample(regs, i); - CTR_WRITE(reset_value[i], real); + CTR_WRITE(reset_value[i], real); CCCR_CLEAR_OVF(low); CCCR_WRITE(low, high, real); - CTR_WRITE(reset_value[i], real); + CTR_WRITE(reset_value[i], real); } } @@ -683,15 +681,16 @@ static void p4_shutdown(struct op_msrs const * const msrs) int i; for (i = 0 ; i < num_counters ; ++i) { - if (CTR_IS_RESERVED(msrs,i)) + if (CTR_IS_RESERVED(msrs, i)) release_perfctr_nmi(msrs->counters[i].addr); } - /* some of the control registers are specially reserved in + /* + * some of the control registers are specially reserved in * conjunction with the counter registers (hence the starting offset). * This saves a few bits. */ for (i = num_counters ; i < num_controls ; ++i) { - if (CTRL_IS_RESERVED(msrs,i)) + if (CTRL_IS_RESERVED(msrs, i)) release_evntsel_nmi(msrs->controls[i].addr); } } -- cgit v0.10.2 From eff9b713ee3540ddab862095aaf4b1511a6758bc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 18 Aug 2008 17:51:08 -0700 Subject: rcu: fix locking cleanup fallout Given that the rcp->lock is now acquired from call_rcu(), which can be invoked from irq-disable regions, all acquisitions need to disable irqs. The following patch fixes this. Although I don't have any reason to believe that this is the cause of Yinghai's oops, it does need to be fixed. Signed-off-by: Paul E. McKenney Cc: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index fb1f1cc4..c6b6cf5 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -86,8 +86,10 @@ static void force_quiescent_state(struct rcu_data *rdp, { int cpu; cpumask_t cpumask; + unsigned long flags; + set_need_resched(); - spin_lock(&rcp->lock); + spin_lock_irqsave(&rcp->lock, flags); if (unlikely(!rcp->signaled)) { rcp->signaled = 1; /* @@ -113,7 +115,7 @@ static void force_quiescent_state(struct rcu_data *rdp, for_each_cpu_mask_nr(cpu, cpumask) smp_send_reschedule(cpu); } - spin_unlock(&rcp->lock); + spin_unlock_irqrestore(&rcp->lock, flags); } #else static inline void force_quiescent_state(struct rcu_data *rdp, @@ -301,17 +303,18 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp) { int cpu; long delta; + unsigned long flags; /* Only let one CPU complain about others per time interval. */ - spin_lock(&rcp->lock); + spin_lock_irqsave(&rcp->lock, flags); delta = get_seconds() - rcp->gp_check; if (delta < 2L || cpus_empty(rcp->cpumask)) { spin_unlock(&rcp->lock); return; } rcp->gp_check = get_seconds() + 30; - spin_unlock(&rcp->lock); + spin_unlock_irqrestore(&rcp->lock, flags); /* OK, time to rat on our buddy... */ @@ -324,13 +327,15 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp) static void print_cpu_stall(struct rcu_ctrlblk *rcp) { + unsigned long flags; + printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu)\n", smp_processor_id(), get_seconds(), rcp->gp_check); dump_stack(); - spin_lock(&rcp->lock); + spin_lock_irqsave(&rcp->lock, flags); if ((long)(get_seconds() - rcp->gp_check) >= 0L) rcp->gp_check = get_seconds() + 30; - spin_unlock(&rcp->lock); + spin_unlock_irqrestore(&rcp->lock, flags); } static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) @@ -413,6 +418,8 @@ static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp) static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) { + unsigned long flags; + if (rdp->quiescbatch != rcp->cur) { /* start new grace period: */ rdp->qs_pending = 1; @@ -436,7 +443,7 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, return; rdp->qs_pending = 0; - spin_lock(&rcp->lock); + spin_lock_irqsave(&rcp->lock, flags); /* * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync * during cpu startup. Ignore the quiescent state. @@ -444,7 +451,7 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, if (likely(rdp->quiescbatch == rcp->cur)) cpu_quiet(rdp->cpu, rcp); - spin_unlock(&rcp->lock); + spin_unlock_irqrestore(&rcp->lock, flags); } @@ -469,21 +476,22 @@ static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list, static void __rcu_offline_cpu(struct rcu_data *this_rdp, struct rcu_ctrlblk *rcp, struct rcu_data *rdp) { + unsigned long flags; + /* * if the cpu going offline owns the grace period * we can block indefinitely waiting for it, so flush * it here */ - spin_lock_bh(&rcp->lock); + spin_lock_irqsave(&rcp->lock, flags); if (rcp->cur != rcp->completed) cpu_quiet(rdp->cpu, rcp); rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail, rcp->cur + 1); rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail[2], rcp->cur + 1); - spin_unlock_bh(&rcp->lock); + spin_unlock(&rcp->lock); - local_irq_disable(); this_rdp->qlen += rdp->qlen; - local_irq_enable(); + local_irq_restore(flags); } static void rcu_offline_cpu(int cpu) @@ -550,12 +558,12 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, if (rcu_batch_after(rdp->batch, rcp->pending)) { /* and start it/schedule start if it's a new batch */ - spin_lock(&rcp->lock); + spin_lock_irqsave(&rcp->lock, flags); if (rcu_batch_after(rdp->batch, rcp->pending)) { rcp->pending = rdp->batch; rcu_start_batch(rcp); } - spin_unlock(&rcp->lock); + spin_unlock_irqrestore(&rcp->lock, flags); } } -- cgit v0.10.2 From 39ac50d0c79747b186c1268d9a488f8c1d256be7 Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Mon, 18 Aug 2008 00:52:08 +0200 Subject: ipvs: Fix race conditions in lblc scheduler We can't access the cache entry outside of our critical read-locked region, because someone may free that entry. And we also need to check in the critical region wether the destination is still available, i.e. it's not in the trash. If we drop our reference counter, the destination can be purged from the trash at any time. Our caller only guarantees that no destination is moved to the trash, while we are scheduling. Also there is no need for our own rwlock, there is already one in the service structure for use in the schedulers. Signed-off-by: Sven Wegener Signed-off-by: Simon Horman diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index b9b334c..d2a43aa 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c @@ -96,7 +96,6 @@ struct ip_vs_lblc_entry { * IPVS lblc hash table */ struct ip_vs_lblc_table { - rwlock_t lock; /* lock for this table */ struct list_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */ atomic_t entries; /* number of entries */ int max_size; /* maximum size of entries */ @@ -123,31 +122,6 @@ static ctl_table vs_vars_table[] = { static struct ctl_table_header * sysctl_header; -/* - * new/free a ip_vs_lblc_entry, which is a mapping of a destionation - * IP address to a server. - */ -static inline struct ip_vs_lblc_entry * -ip_vs_lblc_new(__be32 daddr, struct ip_vs_dest *dest) -{ - struct ip_vs_lblc_entry *en; - - en = kmalloc(sizeof(struct ip_vs_lblc_entry), GFP_ATOMIC); - if (en == NULL) { - IP_VS_ERR("ip_vs_lblc_new(): no memory\n"); - return NULL; - } - - INIT_LIST_HEAD(&en->list); - en->addr = daddr; - - atomic_inc(&dest->refcnt); - en->dest = dest; - - return en; -} - - static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) { list_del(&en->list); @@ -173,55 +147,66 @@ static inline unsigned ip_vs_lblc_hashkey(__be32 addr) * Hash an entry in the ip_vs_lblc_table. * returns bool success. */ -static int +static void ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en) { - unsigned hash; - - if (!list_empty(&en->list)) { - IP_VS_ERR("ip_vs_lblc_hash(): request for already hashed, " - "called from %p\n", __builtin_return_address(0)); - return 0; - } + unsigned hash = ip_vs_lblc_hashkey(en->addr); - /* - * Hash by destination IP address - */ - hash = ip_vs_lblc_hashkey(en->addr); - - write_lock(&tbl->lock); list_add(&en->list, &tbl->bucket[hash]); atomic_inc(&tbl->entries); - write_unlock(&tbl->lock); - - return 1; } /* - * Get ip_vs_lblc_entry associated with supplied parameters. + * Get ip_vs_lblc_entry associated with supplied parameters. Called under read + * lock */ static inline struct ip_vs_lblc_entry * ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr) { - unsigned hash; + unsigned hash = ip_vs_lblc_hashkey(addr); struct ip_vs_lblc_entry *en; - hash = ip_vs_lblc_hashkey(addr); + list_for_each_entry(en, &tbl->bucket[hash], list) + if (en->addr == addr) + return en; - read_lock(&tbl->lock); + return NULL; +} - list_for_each_entry(en, &tbl->bucket[hash], list) { - if (en->addr == addr) { - /* HIT */ - read_unlock(&tbl->lock); - return en; + +/* + * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP + * address to a server. Called under write lock. + */ +static inline struct ip_vs_lblc_entry * +ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, __be32 daddr, + struct ip_vs_dest *dest) +{ + struct ip_vs_lblc_entry *en; + + en = ip_vs_lblc_get(tbl, daddr); + if (!en) { + en = kmalloc(sizeof(*en), GFP_ATOMIC); + if (!en) { + IP_VS_ERR("ip_vs_lblc_new(): no memory\n"); + return NULL; } - } - read_unlock(&tbl->lock); + en->addr = daddr; + en->lastuse = jiffies; - return NULL; + atomic_inc(&dest->refcnt); + en->dest = dest; + + ip_vs_lblc_hash(tbl, en); + } else if (en->dest != dest) { + atomic_dec(&en->dest->refcnt); + atomic_inc(&dest->refcnt); + en->dest = dest; + } + + return en; } @@ -230,30 +215,29 @@ ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr) */ static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl) { - int i; struct ip_vs_lblc_entry *en, *nxt; + int i; for (i=0; ilock); list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { ip_vs_lblc_free(en); atomic_dec(&tbl->entries); } - write_unlock(&tbl->lock); } } -static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl) +static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) { + struct ip_vs_lblc_table *tbl = svc->sched_data; + struct ip_vs_lblc_entry *en, *nxt; unsigned long now = jiffies; int i, j; - struct ip_vs_lblc_entry *en, *nxt; for (i=0, j=tbl->rover; ilock); + write_lock(&svc->sched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { if (time_before(now, en->lastuse + sysctl_ip_vs_lblc_expiration)) @@ -262,7 +246,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl) ip_vs_lblc_free(en); atomic_dec(&tbl->entries); } - write_unlock(&tbl->lock); + write_unlock(&svc->sched_lock); } tbl->rover = j; } @@ -281,17 +265,16 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl) */ static void ip_vs_lblc_check_expire(unsigned long data) { - struct ip_vs_lblc_table *tbl; + struct ip_vs_service *svc = (struct ip_vs_service *) data; + struct ip_vs_lblc_table *tbl = svc->sched_data; unsigned long now = jiffies; int goal; int i, j; struct ip_vs_lblc_entry *en, *nxt; - tbl = (struct ip_vs_lblc_table *)data; - if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { /* do full expiration check */ - ip_vs_lblc_full_check(tbl); + ip_vs_lblc_full_check(svc); tbl->counter = 1; goto out; } @@ -308,7 +291,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) for (i=0, j=tbl->rover; ilock); + write_lock(&svc->sched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) continue; @@ -317,7 +300,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) atomic_dec(&tbl->entries); goal--; } - write_unlock(&tbl->lock); + write_unlock(&svc->sched_lock); if (goal <= 0) break; } @@ -336,15 +319,14 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) /* * Allocate the ip_vs_lblc_table for this service */ - tbl = kmalloc(sizeof(struct ip_vs_lblc_table), GFP_ATOMIC); + tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC); if (tbl == NULL) { IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n"); return -ENOMEM; } svc->sched_data = tbl; IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for " - "current service\n", - sizeof(struct ip_vs_lblc_table)); + "current service\n", sizeof(*tbl)); /* * Initialize the hash buckets @@ -352,7 +334,6 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) for (i=0; ibucket[i]); } - rwlock_init(&tbl->lock); tbl->max_size = IP_VS_LBLC_TAB_SIZE*16; tbl->rover = 0; tbl->counter = 1; @@ -361,9 +342,8 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) * Hook periodic timer for garbage collection */ setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire, - (unsigned long)tbl); - tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL; - add_timer(&tbl->periodic_timer); + (unsigned long)svc); + mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL); return 0; } @@ -380,9 +360,9 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) ip_vs_lblc_flush(tbl); /* release the table itself */ - kfree(svc->sched_data); + kfree(tbl); IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n", - sizeof(struct ip_vs_lblc_table)); + sizeof(*tbl)); return 0; } @@ -478,46 +458,54 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) static struct ip_vs_dest * ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { - struct ip_vs_dest *dest; - struct ip_vs_lblc_table *tbl; - struct ip_vs_lblc_entry *en; + struct ip_vs_lblc_table *tbl = svc->sched_data; struct iphdr *iph = ip_hdr(skb); + struct ip_vs_dest *dest = NULL; + struct ip_vs_lblc_entry *en; IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n"); - tbl = (struct ip_vs_lblc_table *)svc->sched_data; + /* First look in our cache */ + read_lock(&svc->sched_lock); en = ip_vs_lblc_get(tbl, iph->daddr); - if (en == NULL) { - dest = __ip_vs_lblc_schedule(svc, iph); - if (dest == NULL) { - IP_VS_DBG(1, "no destination available\n"); - return NULL; - } - en = ip_vs_lblc_new(iph->daddr, dest); - if (en == NULL) { - return NULL; - } - ip_vs_lblc_hash(tbl, en); - } else { - dest = en->dest; - if (!(dest->flags & IP_VS_DEST_F_AVAILABLE) - || atomic_read(&dest->weight) <= 0 - || is_overloaded(dest, svc)) { - dest = __ip_vs_lblc_schedule(svc, iph); - if (dest == NULL) { - IP_VS_DBG(1, "no destination available\n"); - return NULL; - } - atomic_dec(&en->dest->refcnt); - atomic_inc(&dest->refcnt); - en->dest = dest; - } + if (en) { + /* We only hold a read lock, but this is atomic */ + en->lastuse = jiffies; + + /* + * If the destination is not available, i.e. it's in the trash, + * we must ignore it, as it may be removed from under our feet, + * if someone drops our reference count. Our caller only makes + * sure that destinations, that are not in the trash, are not + * moved to the trash, while we are scheduling. But anyone can + * free up entries from the trash at any time. + */ + + if (en->dest->flags & IP_VS_DEST_F_AVAILABLE) + dest = en->dest; + } + read_unlock(&svc->sched_lock); + + /* If the destination has a weight and is not overloaded, use it */ + if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) + goto out; + + /* No cache entry or it is invalid, time to schedule */ + dest = __ip_vs_lblc_schedule(svc, iph); + if (!dest) { + IP_VS_DBG(1, "no destination available\n"); + return NULL; } - en->lastuse = jiffies; + /* If we fail to create a cache entry, we'll just use the valid dest */ + write_lock(&svc->sched_lock); + ip_vs_lblc_new(tbl, iph->daddr, dest); + write_unlock(&svc->sched_lock); + +out: IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u " "--> server %u.%u.%u.%u:%d\n", - NIPQUAD(en->addr), + NIPQUAD(iph->daddr), NIPQUAD(dest->addr), ntohs(dest->port)); -- cgit v0.10.2 From f728bafb5698076dd35bca35ee6cfe52ea1b8ab2 Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Tue, 19 Aug 2008 08:16:19 +0200 Subject: ipvs: Fix race conditions in lblcr scheduler We can't access the cache entry outside of our critical read-locked region, because someone may free that entry. Also getting an entry under read lock, then locking for write and trying to delete that entry looks fishy, but should be no problem here, because we're only comparing a pointer. Also there is no need for our own rwlock, there is already one in the service structure for use in the schedulers. Signed-off-by: Sven Wegener Signed-off-by: Simon Horman diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index f1c8450..375a1ff 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -106,7 +106,7 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) return NULL; } - e = kmalloc(sizeof(struct ip_vs_dest_list), GFP_ATOMIC); + e = kmalloc(sizeof(*e), GFP_ATOMIC); if (e == NULL) { IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n"); return NULL; @@ -116,11 +116,9 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) e->dest = dest; /* link it to the list */ - write_lock(&set->lock); e->next = set->list; set->list = e; atomic_inc(&set->size); - write_unlock(&set->lock); set->lastmod = jiffies; return e; @@ -131,7 +129,6 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) { struct ip_vs_dest_list *e, **ep; - write_lock(&set->lock); for (ep=&set->list, e=*ep; e!=NULL; e=*ep) { if (e->dest == dest) { /* HIT */ @@ -144,7 +141,6 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) } ep = &e->next; } - write_unlock(&set->lock); } static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) @@ -174,7 +170,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) if (set == NULL) return NULL; - read_lock(&set->lock); /* select the first destination server, whose weight > 0 */ for (e=set->list; e!=NULL; e=e->next) { least = e->dest; @@ -188,7 +183,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) goto nextstage; } } - read_unlock(&set->lock); return NULL; /* find the destination with the weighted least load */ @@ -207,7 +201,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) loh = doh; } } - read_unlock(&set->lock); IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d " "activeconns %d refcnt %d weight %d overhead %d\n", @@ -229,7 +222,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) if (set == NULL) return NULL; - read_lock(&set->lock); /* select the first destination server, whose weight > 0 */ for (e=set->list; e!=NULL; e=e->next) { most = e->dest; @@ -239,7 +231,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) goto nextstage; } } - read_unlock(&set->lock); return NULL; /* find the destination with the weighted most load */ @@ -256,7 +247,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) moh = doh; } } - read_unlock(&set->lock); IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d " "activeconns %d refcnt %d weight %d overhead %d\n", @@ -284,7 +274,6 @@ struct ip_vs_lblcr_entry { * IPVS lblcr hash table */ struct ip_vs_lblcr_table { - rwlock_t lock; /* lock for this table */ struct list_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */ atomic_t entries; /* number of entries */ int max_size; /* maximum size of entries */ @@ -311,32 +300,6 @@ static ctl_table vs_vars_table[] = { static struct ctl_table_header * sysctl_header; -/* - * new/free a ip_vs_lblcr_entry, which is a mapping of a destination - * IP address to a server. - */ -static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__be32 daddr) -{ - struct ip_vs_lblcr_entry *en; - - en = kmalloc(sizeof(struct ip_vs_lblcr_entry), GFP_ATOMIC); - if (en == NULL) { - IP_VS_ERR("ip_vs_lblcr_new(): no memory\n"); - return NULL; - } - - INIT_LIST_HEAD(&en->list); - en->addr = daddr; - - /* initilize its dest set */ - atomic_set(&(en->set.size), 0); - en->set.list = NULL; - rwlock_init(&en->set.lock); - - return en; -} - - static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) { list_del(&en->list); @@ -358,55 +321,68 @@ static inline unsigned ip_vs_lblcr_hashkey(__be32 addr) * Hash an entry in the ip_vs_lblcr_table. * returns bool success. */ -static int +static void ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en) { - unsigned hash; - - if (!list_empty(&en->list)) { - IP_VS_ERR("ip_vs_lblcr_hash(): request for already hashed, " - "called from %p\n", __builtin_return_address(0)); - return 0; - } + unsigned hash = ip_vs_lblcr_hashkey(en->addr); - /* - * Hash by destination IP address - */ - hash = ip_vs_lblcr_hashkey(en->addr); - - write_lock(&tbl->lock); list_add(&en->list, &tbl->bucket[hash]); atomic_inc(&tbl->entries); - write_unlock(&tbl->lock); - - return 1; } /* - * Get ip_vs_lblcr_entry associated with supplied parameters. + * Get ip_vs_lblcr_entry associated with supplied parameters. Called under + * read lock. */ static inline struct ip_vs_lblcr_entry * ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr) { - unsigned hash; + unsigned hash = ip_vs_lblcr_hashkey(addr); struct ip_vs_lblcr_entry *en; - hash = ip_vs_lblcr_hashkey(addr); + list_for_each_entry(en, &tbl->bucket[hash], list) + if (en->addr == addr) + return en; + + return NULL; +} - read_lock(&tbl->lock); - list_for_each_entry(en, &tbl->bucket[hash], list) { - if (en->addr == addr) { - /* HIT */ - read_unlock(&tbl->lock); - return en; +/* + * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination + * IP address to a server. Called under write lock. + */ +static inline struct ip_vs_lblcr_entry * +ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, __be32 daddr, + struct ip_vs_dest *dest) +{ + struct ip_vs_lblcr_entry *en; + + en = ip_vs_lblcr_get(tbl, daddr); + if (!en) { + en = kmalloc(sizeof(*en), GFP_ATOMIC); + if (!en) { + IP_VS_ERR("ip_vs_lblcr_new(): no memory\n"); + return NULL; } + + en->addr = daddr; + en->lastuse = jiffies; + + /* initilize its dest set */ + atomic_set(&(en->set.size), 0); + en->set.list = NULL; + rwlock_init(&en->set.lock); + + ip_vs_lblcr_hash(tbl, en); } - read_unlock(&tbl->lock); + write_lock(&en->set.lock); + ip_vs_dest_set_insert(&en->set, dest); + write_unlock(&en->set.lock); - return NULL; + return en; } @@ -418,19 +394,18 @@ static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl) int i; struct ip_vs_lblcr_entry *en, *nxt; + /* No locking required, only called during cleanup. */ for (i=0; ilock); list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { ip_vs_lblcr_free(en); - atomic_dec(&tbl->entries); } - write_unlock(&tbl->lock); } } -static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) +static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) { + struct ip_vs_lblcr_table *tbl = svc->sched_data; unsigned long now = jiffies; int i, j; struct ip_vs_lblcr_entry *en, *nxt; @@ -438,7 +413,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) for (i=0, j=tbl->rover; ilock); + write_lock(&svc->sched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration, now)) @@ -447,7 +422,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) ip_vs_lblcr_free(en); atomic_dec(&tbl->entries); } - write_unlock(&tbl->lock); + write_unlock(&svc->sched_lock); } tbl->rover = j; } @@ -466,17 +441,16 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) */ static void ip_vs_lblcr_check_expire(unsigned long data) { - struct ip_vs_lblcr_table *tbl; + struct ip_vs_service *svc = (struct ip_vs_service *) data; + struct ip_vs_lblcr_table *tbl = svc->sched_data; unsigned long now = jiffies; int goal; int i, j; struct ip_vs_lblcr_entry *en, *nxt; - tbl = (struct ip_vs_lblcr_table *)data; - if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { /* do full expiration check */ - ip_vs_lblcr_full_check(tbl); + ip_vs_lblcr_full_check(svc); tbl->counter = 1; goto out; } @@ -493,7 +467,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data) for (i=0, j=tbl->rover; ilock); + write_lock(&svc->sched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { if (time_before(now, en->lastuse+ENTRY_TIMEOUT)) continue; @@ -502,7 +476,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data) atomic_dec(&tbl->entries); goal--; } - write_unlock(&tbl->lock); + write_unlock(&svc->sched_lock); if (goal <= 0) break; } @@ -520,15 +494,14 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) /* * Allocate the ip_vs_lblcr_table for this service */ - tbl = kmalloc(sizeof(struct ip_vs_lblcr_table), GFP_ATOMIC); + tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC); if (tbl == NULL) { IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n"); return -ENOMEM; } svc->sched_data = tbl; IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for " - "current service\n", - sizeof(struct ip_vs_lblcr_table)); + "current service\n", sizeof(*tbl)); /* * Initialize the hash buckets @@ -536,7 +509,6 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) for (i=0; ibucket[i]); } - rwlock_init(&tbl->lock); tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16; tbl->rover = 0; tbl->counter = 1; @@ -545,9 +517,8 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) * Hook periodic timer for garbage collection */ setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire, - (unsigned long)tbl); - tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL; - add_timer(&tbl->periodic_timer); + (unsigned long)svc); + mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL); return 0; } @@ -564,9 +535,9 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc) ip_vs_lblcr_flush(tbl); /* release the table itself */ - kfree(svc->sched_data); + kfree(tbl); IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n", - sizeof(struct ip_vs_lblcr_table)); + sizeof(*tbl)); return 0; } @@ -663,50 +634,78 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) static struct ip_vs_dest * ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { - struct ip_vs_dest *dest; - struct ip_vs_lblcr_table *tbl; - struct ip_vs_lblcr_entry *en; + struct ip_vs_lblcr_table *tbl = svc->sched_data; struct iphdr *iph = ip_hdr(skb); + struct ip_vs_dest *dest = NULL; + struct ip_vs_lblcr_entry *en; IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n"); - tbl = (struct ip_vs_lblcr_table *)svc->sched_data; + /* First look in our cache */ + read_lock(&svc->sched_lock); en = ip_vs_lblcr_get(tbl, iph->daddr); - if (en == NULL) { - dest = __ip_vs_lblcr_schedule(svc, iph); - if (dest == NULL) { - IP_VS_DBG(1, "no destination available\n"); - return NULL; - } - en = ip_vs_lblcr_new(iph->daddr); - if (en == NULL) { - return NULL; - } - ip_vs_dest_set_insert(&en->set, dest); - ip_vs_lblcr_hash(tbl, en); - } else { + if (en) { + /* We only hold a read lock, but this is atomic */ + en->lastuse = jiffies; + + /* Get the least loaded destination */ + read_lock(&en->set.lock); dest = ip_vs_dest_set_min(&en->set); - if (!dest || is_overloaded(dest, svc)) { - dest = __ip_vs_lblcr_schedule(svc, iph); - if (dest == NULL) { - IP_VS_DBG(1, "no destination available\n"); - return NULL; - } - ip_vs_dest_set_insert(&en->set, dest); - } + read_unlock(&en->set.lock); + + /* More than one destination + enough time passed by, cleanup */ if (atomic_read(&en->set.size) > 1 && - jiffies-en->set.lastmod > sysctl_ip_vs_lblcr_expiration) { + time_after(jiffies, en->set.lastmod + + sysctl_ip_vs_lblcr_expiration)) { struct ip_vs_dest *m; + + write_lock(&en->set.lock); m = ip_vs_dest_set_max(&en->set); if (m) ip_vs_dest_set_erase(&en->set, m); + write_unlock(&en->set.lock); + } + + /* If the destination is not overloaded, use it */ + if (dest && !is_overloaded(dest, svc)) { + read_unlock(&svc->sched_lock); + goto out; + } + + /* The cache entry is invalid, time to schedule */ + dest = __ip_vs_lblcr_schedule(svc, iph); + if (!dest) { + IP_VS_DBG(1, "no destination available\n"); + read_unlock(&svc->sched_lock); + return NULL; } + + /* Update our cache entry */ + write_lock(&en->set.lock); + ip_vs_dest_set_insert(&en->set, dest); + write_unlock(&en->set.lock); + } + read_unlock(&svc->sched_lock); + + if (dest) + goto out; + + /* No cache entry, time to schedule */ + dest = __ip_vs_lblcr_schedule(svc, iph); + if (!dest) { + IP_VS_DBG(1, "no destination available\n"); + return NULL; } - en->lastuse = jiffies; + /* If we fail to create a cache entry, we'll just use the valid dest */ + write_lock(&svc->sched_lock); + ip_vs_lblcr_new(tbl, iph->daddr, dest); + write_unlock(&svc->sched_lock); + +out: IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u " "--> server %u.%u.%u.%u:%d\n", - NIPQUAD(en->addr), + NIPQUAD(iph->daddr), NIPQUAD(dest->addr), ntohs(dest->port)); -- cgit v0.10.2 From ad67ef6848a1608b0430003e11e7af1ce706e341 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Tue, 19 Aug 2008 11:08:40 +0300 Subject: ARM: OMAP2: Powerdomain: Add base OMAP2/3 powerdomain code This patch creates an interface to the powerdomain registers in the PRM/CM modules on OMAP2/3. This interface is intended to be used by PM code, e.g., pm.c; not by device drivers directly. Each powerdomain will be defined in later patches as static structures. Also defined are dependencies between powerdomains, used for adding and removing PM_WKDEP and CM_SLEEPDEP bits. The powerdomain structures are linked into a list at boot by pwrdm_register(), similar to the OMAP clock code. The patch adds a Kconfig option, CONFIG_OMAP_DEBUG_POWERDOMAIN, which when enabled will emit verbose debug messages via pr_debug(). Signed-off-by: Paul Walmsley Signed-off-by: Tony Lindgren diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 93ee990..1001d42 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -4,7 +4,7 @@ # Common support obj-y := irq.o id.o io.o memory.o control.o prcm.o clock.o mux.o \ - devices.o serial.o gpmc.o timer-gp.o + devices.o serial.o gpmc.o timer-gp.o powerdomain.o obj-$(CONFIG_OMAP_MCBSP) += mcbsp.o diff --git a/arch/arm/mach-omap2/clock34xx.c b/arch/arm/mach-omap2/clock34xx.c index 3ff7495..dff7a72f 100644 --- a/arch/arm/mach-omap2/clock34xx.c +++ b/arch/arm/mach-omap2/clock34xx.c @@ -62,11 +62,14 @@ static void omap3_dpll_recalc(struct clk *clk) static void _omap3_dpll_write_clken(struct clk *clk, u8 clken_bits) { const struct dpll_data *dd; + u32 v; dd = clk->dpll_data; - cm_rmw_reg_bits(dd->enable_mask, clken_bits << __ffs(dd->enable_mask), - dd->control_reg); + v = __raw_readl(dd->control_reg); + v &= ~dd->enable_mask; + v |= clken_bits << __ffs(dd->enable_mask); + __raw_writel(v, dd->control_reg); } /* _omap3_wait_dpll_status: wait for a DPLL to enter a specific state */ @@ -82,7 +85,7 @@ static int _omap3_wait_dpll_status(struct clk *clk, u8 state) state <<= dd->idlest_bit; idlest_mask = 1 << dd->idlest_bit; - while (((cm_read_reg(dd->idlest_reg) & idlest_mask) != state) && + while (((__raw_readl(dd->idlest_reg) & idlest_mask) != state) && i < MAX_DPLL_WAIT_TRIES) { i++; udelay(1); @@ -285,7 +288,7 @@ static u32 omap3_dpll_autoidle_read(struct clk *clk) dd = clk->dpll_data; - v = cm_read_reg(dd->autoidle_reg); + v = __raw_readl(dd->autoidle_reg); v &= dd->autoidle_mask; v >>= __ffs(dd->autoidle_mask); @@ -304,6 +307,7 @@ static u32 omap3_dpll_autoidle_read(struct clk *clk) static void omap3_dpll_allow_idle(struct clk *clk) { const struct dpll_data *dd; + u32 v; if (!clk || !clk->dpll_data) return; @@ -315,9 +319,10 @@ static void omap3_dpll_allow_idle(struct clk *clk) * by writing 0x5 instead of 0x1. Add some mechanism to * optionally enter this mode. */ - cm_rmw_reg_bits(dd->autoidle_mask, - DPLL_AUTOIDLE_LOW_POWER_STOP << __ffs(dd->autoidle_mask), - dd->autoidle_reg); + v = __raw_readl(dd->autoidle_reg); + v &= ~dd->autoidle_mask; + v |= DPLL_AUTOIDLE_LOW_POWER_STOP << __ffs(dd->autoidle_mask); + __raw_writel(v, dd->autoidle_reg); } /** @@ -329,15 +334,17 @@ static void omap3_dpll_allow_idle(struct clk *clk) static void omap3_dpll_deny_idle(struct clk *clk) { const struct dpll_data *dd; + u32 v; if (!clk || !clk->dpll_data) return; dd = clk->dpll_data; - cm_rmw_reg_bits(dd->autoidle_mask, - DPLL_AUTOIDLE_DISABLE << __ffs(dd->autoidle_mask), - dd->autoidle_reg); + v = __raw_readl(dd->autoidle_reg); + v &= ~dd->autoidle_mask; + v |= DPLL_AUTOIDLE_DISABLE << __ffs(dd->autoidle_mask); + __raw_writel(v, dd->autoidle_reg); } /* Clock control for DPLL outputs */ diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c new file mode 100644 index 0000000..1ec0038 --- /dev/null +++ b/arch/arm/mach-omap2/powerdomain.c @@ -0,0 +1,909 @@ +/* + * OMAP powerdomain control + * + * Copyright (C) 2007-2008 Texas Instruments, Inc. + * Copyright (C) 2007-2008 Nokia Corporation + * + * Written by Paul Walmsley + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifdef CONFIG_OMAP_DEBUG_POWERDOMAIN +# define DEBUG +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "cm.h" +#include "cm-regbits-34xx.h" +#include "prm.h" +#include "prm-regbits-34xx.h" + +#include +#include + +/* pwrdm_list contains all registered struct powerdomains */ +static LIST_HEAD(pwrdm_list); + +/* + * pwrdm_rwlock protects pwrdm_list add and del ops - also reused to + * protect pwrdm_clkdms[] during clkdm add/del ops + */ +static DEFINE_RWLOCK(pwrdm_rwlock); + + +/* Private functions */ + +static u32 prm_read_mod_bits_shift(s16 domain, s16 idx, u32 mask) +{ + u32 v; + + v = prm_read_mod_reg(domain, idx); + v &= mask; + v >>= __ffs(mask); + + return v; +} + +static struct powerdomain *_pwrdm_lookup(const char *name) +{ + struct powerdomain *pwrdm, *temp_pwrdm; + + pwrdm = NULL; + + list_for_each_entry(temp_pwrdm, &pwrdm_list, node) { + if (!strcmp(name, temp_pwrdm->name)) { + pwrdm = temp_pwrdm; + break; + } + } + + return pwrdm; +} + +/* _pwrdm_deps_lookup - look up the specified powerdomain in a pwrdm list */ +static struct powerdomain *_pwrdm_deps_lookup(struct powerdomain *pwrdm, + struct pwrdm_dep *deps) +{ + struct pwrdm_dep *pd; + + if (!pwrdm || !deps || !omap_chip_is(pwrdm->omap_chip)) + return ERR_PTR(-EINVAL); + + for (pd = deps; pd; pd++) { + + if (!omap_chip_is(pd->omap_chip)) + continue; + + if (!pd->pwrdm && pd->pwrdm_name) + pd->pwrdm = pwrdm_lookup(pd->pwrdm_name); + + if (pd->pwrdm == pwrdm) + break; + + } + + if (!pd) + return ERR_PTR(-ENOENT); + + return pd->pwrdm; +} + + +/* Public functions */ + +/** + * pwrdm_init - set up the powerdomain layer + * + * Loop through the list of powerdomains, registering all that are + * available on the current CPU. If pwrdm_list is supplied and not + * null, all of the referenced powerdomains will be registered. No + * return value. + */ +void pwrdm_init(struct powerdomain **pwrdm_list) +{ + struct powerdomain **p = NULL; + + if (pwrdm_list) + for (p = pwrdm_list; *p; p++) + pwrdm_register(*p); +} + +/** + * pwrdm_register - register a powerdomain + * @pwrdm: struct powerdomain * to register + * + * Adds a powerdomain to the internal powerdomain list. Returns + * -EINVAL if given a null pointer, -EEXIST if a powerdomain is + * already registered by the provided name, or 0 upon success. + */ +int pwrdm_register(struct powerdomain *pwrdm) +{ + unsigned long flags; + int ret = -EINVAL; + + if (!pwrdm) + return -EINVAL; + + if (!omap_chip_is(pwrdm->omap_chip)) + return -EINVAL; + + write_lock_irqsave(&pwrdm_rwlock, flags); + if (_pwrdm_lookup(pwrdm->name)) { + ret = -EEXIST; + goto pr_unlock; + } + + list_add(&pwrdm->node, &pwrdm_list); + + pr_debug("powerdomain: registered %s\n", pwrdm->name); + ret = 0; + +pr_unlock: + write_unlock_irqrestore(&pwrdm_rwlock, flags); + + return ret; +} + +/** + * pwrdm_unregister - unregister a powerdomain + * @pwrdm: struct powerdomain * to unregister + * + * Removes a powerdomain from the internal powerdomain list. Returns + * -EINVAL if pwrdm argument is NULL. + */ +int pwrdm_unregister(struct powerdomain *pwrdm) +{ + unsigned long flags; + + if (!pwrdm) + return -EINVAL; + + write_lock_irqsave(&pwrdm_rwlock, flags); + list_del(&pwrdm->node); + write_unlock_irqrestore(&pwrdm_rwlock, flags); + + pr_debug("powerdomain: unregistered %s\n", pwrdm->name); + + return 0; +} + +/** + * pwrdm_lookup - look up a powerdomain by name, return a pointer + * @name: name of powerdomain + * + * Find a registered powerdomain by its name. Returns a pointer to the + * struct powerdomain if found, or NULL otherwise. + */ +struct powerdomain *pwrdm_lookup(const char *name) +{ + struct powerdomain *pwrdm; + unsigned long flags; + + if (!name) + return NULL; + + read_lock_irqsave(&pwrdm_rwlock, flags); + pwrdm = _pwrdm_lookup(name); + read_unlock_irqrestore(&pwrdm_rwlock, flags); + + return pwrdm; +} + +/** + * pwrdm_for_each - call function on each registered clockdomain + * @fn: callback function * + * + * Call the supplied function for each registered powerdomain. The + * callback function can return anything but 0 to bail out early from + * the iterator. The callback function is called with the pwrdm_rwlock + * held for reading, so no powerdomain structure manipulation + * functions should be called from the callback, although hardware + * powerdomain control functions are fine. Returns the last return + * value of the callback function, which should be 0 for success or + * anything else to indicate failure; or -EINVAL if the function + * pointer is null. + */ +int pwrdm_for_each(int (*fn)(struct powerdomain *pwrdm)) +{ + struct powerdomain *temp_pwrdm; + unsigned long flags; + int ret = 0; + + if (!fn) + return -EINVAL; + + read_lock_irqsave(&pwrdm_rwlock, flags); + list_for_each_entry(temp_pwrdm, &pwrdm_list, node) { + ret = (*fn)(temp_pwrdm); + if (ret) + break; + } + read_unlock_irqrestore(&pwrdm_rwlock, flags); + + return ret; +} + +/** + * pwrdm_add_wkdep - add a wakeup dependency from pwrdm2 to pwrdm1 + * @pwrdm1: wake this struct powerdomain * up (dependent) + * @pwrdm2: when this struct powerdomain * wakes up (source) + * + * When the powerdomain represented by pwrdm2 wakes up (due to an + * interrupt), wake up pwrdm1. Implemented in hardware on the OMAP, + * this feature is designed to reduce wakeup latency of the dependent + * powerdomain. Returns -EINVAL if presented with invalid powerdomain + * pointers, -ENOENT if pwrdm2 cannot wake up pwrdm1 in hardware, or + * 0 upon success. + */ +int pwrdm_add_wkdep(struct powerdomain *pwrdm1, struct powerdomain *pwrdm2) +{ + struct powerdomain *p; + + if (!pwrdm1) + return -EINVAL; + + p = _pwrdm_deps_lookup(pwrdm2, pwrdm1->wkdep_srcs); + if (IS_ERR(p)) { + pr_debug("powerdomain: hardware cannot set/clear wake up of " + "%s when %s wakes up\n", pwrdm1->name, pwrdm2->name); + return IS_ERR(p); + } + + pr_debug("powerdomain: hardware will wake up %s when %s wakes up\n", + pwrdm1->name, pwrdm2->name); + + prm_set_mod_reg_bits((1 << pwrdm2->dep_bit), + pwrdm1->prcm_offs, PM_WKDEP); + + return 0; +} + +/** + * pwrdm_del_wkdep - remove a wakeup dependency from pwrdm2 to pwrdm1 + * @pwrdm1: wake this struct powerdomain * up (dependent) + * @pwrdm2: when this struct powerdomain * wakes up (source) + * + * Remove a wakeup dependency that causes pwrdm1 to wake up when pwrdm2 + * wakes up. Returns -EINVAL if presented with invalid powerdomain + * pointers, -ENOENT if pwrdm2 cannot wake up pwrdm1 in hardware, or + * 0 upon success. + */ +int pwrdm_del_wkdep(struct powerdomain *pwrdm1, struct powerdomain *pwrdm2) +{ + struct powerdomain *p; + + if (!pwrdm1) + return -EINVAL; + + p = _pwrdm_deps_lookup(pwrdm2, pwrdm1->wkdep_srcs); + if (IS_ERR(p)) { + pr_debug("powerdomain: hardware cannot set/clear wake up of " + "%s when %s wakes up\n", pwrdm1->name, pwrdm2->name); + return IS_ERR(p); + } + + pr_debug("powerdomain: hardware will no longer wake up %s after %s " + "wakes up\n", pwrdm1->name, pwrdm2->name); + + prm_clear_mod_reg_bits((1 << pwrdm2->dep_bit), + pwrdm1->prcm_offs, PM_WKDEP); + + return 0; +} + +/** + * pwrdm_read_wkdep - read wakeup dependency state from pwrdm2 to pwrdm1 + * @pwrdm1: wake this struct powerdomain * up (dependent) + * @pwrdm2: when this struct powerdomain * wakes up (source) + * + * Return 1 if a hardware wakeup dependency exists wherein pwrdm1 will be + * awoken when pwrdm2 wakes up; 0 if dependency is not set; -EINVAL + * if either powerdomain pointer is invalid; or -ENOENT if the hardware + * is incapable. + * + * REVISIT: Currently this function only represents software-controllable + * wakeup dependencies. Wakeup dependencies fixed in hardware are not + * yet handled here. + */ +int pwrdm_read_wkdep(struct powerdomain *pwrdm1, struct powerdomain *pwrdm2) +{ + struct powerdomain *p; + + if (!pwrdm1) + return -EINVAL; + + p = _pwrdm_deps_lookup(pwrdm2, pwrdm1->wkdep_srcs); + if (IS_ERR(p)) { + pr_debug("powerdomain: hardware cannot set/clear wake up of " + "%s when %s wakes up\n", pwrdm1->name, pwrdm2->name); + return IS_ERR(p); + } + + return prm_read_mod_bits_shift(pwrdm1->prcm_offs, PM_WKDEP, + (1 << pwrdm2->dep_bit)); +} + +/** + * pwrdm_add_sleepdep - add a sleep dependency from pwrdm2 to pwrdm1 + * @pwrdm1: prevent this struct powerdomain * from sleeping (dependent) + * @pwrdm2: when this struct powerdomain * is active (source) + * + * Prevent pwrdm1 from automatically going inactive (and then to + * retention or off) if pwrdm2 is still active. Returns -EINVAL if + * presented with invalid powerdomain pointers or called on a machine + * that does not support software-configurable hardware sleep dependencies, + * -ENOENT if the specified dependency cannot be set in hardware, or + * 0 upon success. + */ +int pwrdm_add_sleepdep(struct powerdomain *pwrdm1, struct powerdomain *pwrdm2) +{ + struct powerdomain *p; + + if (!pwrdm1) + return -EINVAL; + + if (!cpu_is_omap34xx()) + return -EINVAL; + + p = _pwrdm_deps_lookup(pwrdm2, pwrdm1->sleepdep_srcs); + if (IS_ERR(p)) { + pr_debug("powerdomain: hardware cannot set/clear sleep " + "dependency affecting %s from %s\n", pwrdm1->name, + pwrdm2->name); + return IS_ERR(p); + } + + pr_debug("powerdomain: will prevent %s from sleeping if %s is active\n", + pwrdm1->name, pwrdm2->name); + + cm_set_mod_reg_bits((1 << pwrdm2->dep_bit), + pwrdm1->prcm_offs, OMAP3430_CM_SLEEPDEP); + + return 0; +} + +/** + * pwrdm_del_sleepdep - remove a sleep dependency from pwrdm2 to pwrdm1 + * @pwrdm1: prevent this struct powerdomain * from sleeping (dependent) + * @pwrdm2: when this struct powerdomain * is active (source) + * + * Allow pwrdm1 to automatically go inactive (and then to retention or + * off), independent of the activity state of pwrdm2. Returns -EINVAL + * if presented with invalid powerdomain pointers or called on a machine + * that does not support software-configurable hardware sleep dependencies, + * -ENOENT if the specified dependency cannot be cleared in hardware, or + * 0 upon success. + */ +int pwrdm_del_sleepdep(struct powerdomain *pwrdm1, struct powerdomain *pwrdm2) +{ + struct powerdomain *p; + + if (!pwrdm1) + return -EINVAL; + + if (!cpu_is_omap34xx()) + return -EINVAL; + + p = _pwrdm_deps_lookup(pwrdm2, pwrdm1->sleepdep_srcs); + if (IS_ERR(p)) { + pr_debug("powerdomain: hardware cannot set/clear sleep " + "dependency affecting %s from %s\n", pwrdm1->name, + pwrdm2->name); + return IS_ERR(p); + } + + pr_debug("powerdomain: will no longer prevent %s from sleeping if " + "%s is active\n", pwrdm1->name, pwrdm2->name); + + cm_clear_mod_reg_bits((1 << pwrdm2->dep_bit), + pwrdm1->prcm_offs, OMAP3430_CM_SLEEPDEP); + + return 0; +} + +/** + * pwrdm_read_sleepdep - read sleep dependency state from pwrdm2 to pwrdm1 + * @pwrdm1: prevent this struct powerdomain * from sleeping (dependent) + * @pwrdm2: when this struct powerdomain * is active (source) + * + * Return 1 if a hardware sleep dependency exists wherein pwrdm1 will + * not be allowed to automatically go inactive if pwrdm2 is active; + * 0 if pwrdm1's automatic power state inactivity transition is independent + * of pwrdm2's; -EINVAL if either powerdomain pointer is invalid or called + * on a machine that does not support software-configurable hardware sleep + * dependencies; or -ENOENT if the hardware is incapable. + * + * REVISIT: Currently this function only represents software-controllable + * sleep dependencies. Sleep dependencies fixed in hardware are not + * yet handled here. + */ +int pwrdm_read_sleepdep(struct powerdomain *pwrdm1, struct powerdomain *pwrdm2) +{ + struct powerdomain *p; + + if (!pwrdm1) + return -EINVAL; + + if (!cpu_is_omap34xx()) + return -EINVAL; + + p = _pwrdm_deps_lookup(pwrdm2, pwrdm1->sleepdep_srcs); + if (IS_ERR(p)) { + pr_debug("powerdomain: hardware cannot set/clear sleep " + "dependency affecting %s from %s\n", pwrdm1->name, + pwrdm2->name); + return IS_ERR(p); + } + + return prm_read_mod_bits_shift(pwrdm1->prcm_offs, OMAP3430_CM_SLEEPDEP, + (1 << pwrdm2->dep_bit)); +} + +/** + * pwrdm_get_mem_bank_count - get number of memory banks in this powerdomain + * @pwrdm: struct powerdomain * + * + * Return the number of controllable memory banks in powerdomain pwrdm, + * starting with 1. Returns -EINVAL if the powerdomain pointer is null. + */ +int pwrdm_get_mem_bank_count(struct powerdomain *pwrdm) +{ + if (!pwrdm) + return -EINVAL; + + return pwrdm->banks; +} + +/** + * pwrdm_set_next_pwrst - set next powerdomain power state + * @pwrdm: struct powerdomain * to set + * @pwrst: one of the PWRDM_POWER_* macros + * + * Set the powerdomain pwrdm's next power state to pwrst. The powerdomain + * may not enter this state immediately if the preconditions for this state + * have not been satisfied. Returns -EINVAL if the powerdomain pointer is + * null or if the power state is invalid for the powerdomin, or returns 0 + * upon success. + */ +int pwrdm_set_next_pwrst(struct powerdomain *pwrdm, u8 pwrst) +{ + if (!pwrdm) + return -EINVAL; + + if (!(pwrdm->pwrsts & (1 << pwrst))) + return -EINVAL; + + pr_debug("powerdomain: setting next powerstate for %s to %0x\n", + pwrdm->name, pwrst); + + prm_rmw_mod_reg_bits(OMAP_POWERSTATE_MASK, + (pwrst << OMAP_POWERSTATE_SHIFT), + pwrdm->prcm_offs, PM_PWSTCTRL); + + return 0; +} + +/** + * pwrdm_read_next_pwrst - get next powerdomain power state + * @pwrdm: struct powerdomain * to get power state + * + * Return the powerdomain pwrdm's next power state. Returns -EINVAL + * if the powerdomain pointer is null or returns the next power state + * upon success. + */ +int pwrdm_read_next_pwrst(struct powerdomain *pwrdm) +{ + if (!pwrdm) + return -EINVAL; + + return prm_read_mod_bits_shift(pwrdm->prcm_offs, PM_PWSTCTRL, + OMAP_POWERSTATE_MASK); +} + +/** + * pwrdm_read_pwrst - get current powerdomain power state + * @pwrdm: struct powerdomain * to get power state + * + * Return the powerdomain pwrdm's current power state. Returns -EINVAL + * if the powerdomain pointer is null or returns the current power state + * upon success. + */ +int pwrdm_read_pwrst(struct powerdomain *pwrdm) +{ + if (!pwrdm) + return -EINVAL; + + return prm_read_mod_bits_shift(pwrdm->prcm_offs, PM_PWSTST, + OMAP_POWERSTATEST_MASK); +} + +/** + * pwrdm_read_prev_pwrst - get previous powerdomain power state + * @pwrdm: struct powerdomain * to get previous power state + * + * Return the powerdomain pwrdm's previous power state. Returns -EINVAL + * if the powerdomain pointer is null or returns the previous power state + * upon success. + */ +int pwrdm_read_prev_pwrst(struct powerdomain *pwrdm) +{ + if (!pwrdm) + return -EINVAL; + + return prm_read_mod_bits_shift(pwrdm->prcm_offs, OMAP3430_PM_PREPWSTST, + OMAP3430_LASTPOWERSTATEENTERED_MASK); +} + +/** + * pwrdm_set_logic_retst - set powerdomain logic power state upon retention + * @pwrdm: struct powerdomain * to set + * @pwrst: one of the PWRDM_POWER_* macros + * + * Set the next power state that the logic portion of the powerdomain + * pwrdm will enter when the powerdomain enters retention. This will + * be either RETENTION or OFF, if supported. Returns -EINVAL if the + * powerdomain pointer is null or the target power state is not not + * supported, or returns 0 upon success. + */ +int pwrdm_set_logic_retst(struct powerdomain *pwrdm, u8 pwrst) +{ + if (!pwrdm) + return -EINVAL; + + if (!(pwrdm->pwrsts_logic_ret & (1 << pwrst))) + return -EINVAL; + + pr_debug("powerdomain: setting next logic powerstate for %s to %0x\n", + pwrdm->name, pwrst); + + /* + * The register bit names below may not correspond to the + * actual names of the bits in each powerdomain's register, + * but the type of value returned is the same for each + * powerdomain. + */ + prm_rmw_mod_reg_bits(OMAP3430_LOGICL1CACHERETSTATE, + (pwrst << __ffs(OMAP3430_LOGICL1CACHERETSTATE)), + pwrdm->prcm_offs, PM_PWSTCTRL); + + return 0; +} + +/** + * pwrdm_set_mem_onst - set memory power state while powerdomain ON + * @pwrdm: struct powerdomain * to set + * @bank: memory bank number to set (0-3) + * @pwrst: one of the PWRDM_POWER_* macros + * + * Set the next power state that memory bank x of the powerdomain + * pwrdm will enter when the powerdomain enters the ON state. Bank + * will be a number from 0 to 3, and represents different types of + * memory, depending on the powerdomain. Returns -EINVAL if the + * powerdomain pointer is null or the target power state is not not + * supported for this memory bank, -EEXIST if the target memory bank + * does not exist or is not controllable, or returns 0 upon success. + */ +int pwrdm_set_mem_onst(struct powerdomain *pwrdm, u8 bank, u8 pwrst) +{ + u32 m; + + if (!pwrdm) + return -EINVAL; + + if (pwrdm->banks < (bank + 1)) + return -EEXIST; + + if (!(pwrdm->pwrsts_mem_on[bank] & (1 << pwrst))) + return -EINVAL; + + pr_debug("powerdomain: setting next memory powerstate for domain %s " + "bank %0x while pwrdm-ON to %0x\n", pwrdm->name, bank, pwrst); + + /* + * The register bit names below may not correspond to the + * actual names of the bits in each powerdomain's register, + * but the type of value returned is the same for each + * powerdomain. + */ + switch (bank) { + case 0: + m = OMAP3430_SHAREDL1CACHEFLATONSTATE_MASK; + break; + case 1: + m = OMAP3430_L1FLATMEMONSTATE_MASK; + break; + case 2: + m = OMAP3430_SHAREDL2CACHEFLATONSTATE_MASK; + break; + case 3: + m = OMAP3430_L2FLATMEMONSTATE_MASK; + break; + default: + WARN_ON(1); /* should never happen */ + return -EEXIST; + } + + prm_rmw_mod_reg_bits(m, (pwrst << __ffs(m)), + pwrdm->prcm_offs, PM_PWSTCTRL); + + return 0; +} + +/** + * pwrdm_set_mem_retst - set memory power state while powerdomain in RET + * @pwrdm: struct powerdomain * to set + * @bank: memory bank number to set (0-3) + * @pwrst: one of the PWRDM_POWER_* macros + * + * Set the next power state that memory bank x of the powerdomain + * pwrdm will enter when the powerdomain enters the RETENTION state. + * Bank will be a number from 0 to 3, and represents different types + * of memory, depending on the powerdomain. pwrst will be either + * RETENTION or OFF, if supported. Returns -EINVAL if the powerdomain + * pointer is null or the target power state is not not supported for + * this memory bank, -EEXIST if the target memory bank does not exist + * or is not controllable, or returns 0 upon success. + */ +int pwrdm_set_mem_retst(struct powerdomain *pwrdm, u8 bank, u8 pwrst) +{ + u32 m; + + if (!pwrdm) + return -EINVAL; + + if (pwrdm->banks < (bank + 1)) + return -EEXIST; + + if (!(pwrdm->pwrsts_mem_ret[bank] & (1 << pwrst))) + return -EINVAL; + + pr_debug("powerdomain: setting next memory powerstate for domain %s " + "bank %0x while pwrdm-RET to %0x\n", pwrdm->name, bank, pwrst); + + /* + * The register bit names below may not correspond to the + * actual names of the bits in each powerdomain's register, + * but the type of value returned is the same for each + * powerdomain. + */ + switch (bank) { + case 0: + m = OMAP3430_SHAREDL1CACHEFLATRETSTATE; + break; + case 1: + m = OMAP3430_L1FLATMEMRETSTATE; + break; + case 2: + m = OMAP3430_SHAREDL2CACHEFLATRETSTATE; + break; + case 3: + m = OMAP3430_L2FLATMEMRETSTATE; + break; + default: + WARN_ON(1); /* should never happen */ + return -EEXIST; + } + + prm_rmw_mod_reg_bits(m, (pwrst << __ffs(m)), pwrdm->prcm_offs, + PM_PWSTCTRL); + + return 0; +} + +/** + * pwrdm_read_logic_pwrst - get current powerdomain logic retention power state + * @pwrdm: struct powerdomain * to get current logic retention power state + * + * Return the current power state that the logic portion of + * powerdomain pwrdm will enter + * Returns -EINVAL if the powerdomain pointer is null or returns the + * current logic retention power state upon success. + */ +int pwrdm_read_logic_pwrst(struct powerdomain *pwrdm) +{ + if (!pwrdm) + return -EINVAL; + + return prm_read_mod_bits_shift(pwrdm->prcm_offs, PM_PWSTST, + OMAP3430_LOGICSTATEST); +} + +/** + * pwrdm_read_prev_logic_pwrst - get previous powerdomain logic power state + * @pwrdm: struct powerdomain * to get previous logic power state + * + * Return the powerdomain pwrdm's logic power state. Returns -EINVAL + * if the powerdomain pointer is null or returns the previous logic + * power state upon success. + */ +int pwrdm_read_prev_logic_pwrst(struct powerdomain *pwrdm) +{ + if (!pwrdm) + return -EINVAL; + + /* + * The register bit names below may not correspond to the + * actual names of the bits in each powerdomain's register, + * but the type of value returned is the same for each + * powerdomain. + */ + return prm_read_mod_bits_shift(pwrdm->prcm_offs, OMAP3430_PM_PREPWSTST, + OMAP3430_LASTLOGICSTATEENTERED); +} + +/** + * pwrdm_read_mem_pwrst - get current memory bank power state + * @pwrdm: struct powerdomain * to get current memory bank power state + * @bank: memory bank number (0-3) + * + * Return the powerdomain pwrdm's current memory power state for bank + * x. Returns -EINVAL if the powerdomain pointer is null, -EEXIST if + * the target memory bank does not exist or is not controllable, or + * returns the current memory power state upon success. + */ +int pwrdm_read_mem_pwrst(struct powerdomain *pwrdm, u8 bank) +{ + u32 m; + + if (!pwrdm) + return -EINVAL; + + if (pwrdm->banks < (bank + 1)) + return -EEXIST; + + /* + * The register bit names below may not correspond to the + * actual names of the bits in each powerdomain's register, + * but the type of value returned is the same for each + * powerdomain. + */ + switch (bank) { + case 0: + m = OMAP3430_SHAREDL1CACHEFLATSTATEST_MASK; + break; + case 1: + m = OMAP3430_L1FLATMEMSTATEST_MASK; + break; + case 2: + m = OMAP3430_SHAREDL2CACHEFLATSTATEST_MASK; + break; + case 3: + m = OMAP3430_L2FLATMEMSTATEST_MASK; + break; + default: + WARN_ON(1); /* should never happen */ + return -EEXIST; + } + + return prm_read_mod_bits_shift(pwrdm->prcm_offs, PM_PWSTST, m); +} + +/** + * pwrdm_read_prev_mem_pwrst - get previous memory bank power state + * @pwrdm: struct powerdomain * to get previous memory bank power state + * @bank: memory bank number (0-3) + * + * Return the powerdomain pwrdm's previous memory power state for bank + * x. Returns -EINVAL if the powerdomain pointer is null, -EEXIST if + * the target memory bank does not exist or is not controllable, or + * returns the previous memory power state upon success. + */ +int pwrdm_read_prev_mem_pwrst(struct powerdomain *pwrdm, u8 bank) +{ + u32 m; + + if (!pwrdm) + return -EINVAL; + + if (pwrdm->banks < (bank + 1)) + return -EEXIST; + + /* + * The register bit names below may not correspond to the + * actual names of the bits in each powerdomain's register, + * but the type of value returned is the same for each + * powerdomain. + */ + switch (bank) { + case 0: + m = OMAP3430_LASTMEM1STATEENTERED_MASK; + break; + case 1: + m = OMAP3430_LASTMEM2STATEENTERED_MASK; + break; + case 2: + m = OMAP3430_LASTSHAREDL2CACHEFLATSTATEENTERED_MASK; + break; + case 3: + m = OMAP3430_LASTL2FLATMEMSTATEENTERED_MASK; + break; + default: + WARN_ON(1); /* should never happen */ + return -EEXIST; + } + + return prm_read_mod_bits_shift(pwrdm->prcm_offs, + OMAP3430_PM_PREPWSTST, m); +} + +/** + * pwrdm_clear_all_prev_pwrst - clear previous powerstate register for a pwrdm + * @pwrdm: struct powerdomain * to clear + * + * Clear the powerdomain's previous power state register. Clears the + * entire register, including logic and memory bank previous power states. + * Returns -EINVAL if the powerdomain pointer is null, or returns 0 upon + * success. + */ +int pwrdm_clear_all_prev_pwrst(struct powerdomain *pwrdm) +{ + if (!pwrdm) + return -EINVAL; + + /* + * XXX should get the powerdomain's current state here; + * warn & fail if it is not ON. + */ + + pr_debug("powerdomain: clearing previous power state reg for %s\n", + pwrdm->name); + + prm_write_mod_reg(0, pwrdm->prcm_offs, OMAP3430_PM_PREPWSTST); + + return 0; +} + +/** + * pwrdm_wait_transition - wait for powerdomain power transition to finish + * @pwrdm: struct powerdomain * to wait for + * + * If the powerdomain pwrdm is in the process of a state transition, + * spin until it completes the power transition, or until an iteration + * bailout value is reached. Returns -EINVAL if the powerdomain + * pointer is null, -EAGAIN if the bailout value was reached, or + * returns 0 upon success. + */ +int pwrdm_wait_transition(struct powerdomain *pwrdm) +{ + u32 c = 0; + + if (!pwrdm) + return -EINVAL; + + /* + * REVISIT: pwrdm_wait_transition() may be better implemented + * via a callback and a periodic timer check -- how long do we expect + * powerdomain transitions to take? + */ + + /* XXX Is this udelay() value meaningful? */ + while ((prm_read_mod_reg(pwrdm->prcm_offs, PM_PWSTST) & + OMAP_INTRANSITION) && + (c++ < PWRDM_TRANSITION_BAILOUT)) + udelay(1); + + if (c >= PWRDM_TRANSITION_BAILOUT) { + printk(KERN_ERR "powerdomain: waited too long for " + "powerdomain %s to complete transition\n", pwrdm->name); + return -EAGAIN; + } + + pr_debug("powerdomain: completed transition in %d loops\n", c); + + return 0; +} + + diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig index b917206..e815fa3 100644 --- a/arch/arm/plat-omap/Kconfig +++ b/arch/arm/plat-omap/Kconfig @@ -29,6 +29,18 @@ config OMAP_DEBUG_LEDS depends on OMAP_DEBUG_DEVICES default y if LEDS || LEDS_OMAP_DEBUG +config OMAP_DEBUG_POWERDOMAIN + bool "Emit debug messages from powerdomain layer" + depends on ARCH_OMAP2 || ARCH_OMAP3 + default n + help + Say Y here if you want to compile in powerdomain layer + debugging messages for OMAP2/3. These messages can + provide more detail as to why some powerdomain calls + may be failing, and will also emit a descriptive message + for every powerdomain register write. However, the + extra detail costs some memory. + config OMAP_RESET_CLOCKS bool "Reset unused clocks during boot" depends on ARCH_OMAP diff --git a/arch/arm/plat-omap/include/mach/powerdomain.h b/arch/arm/plat-omap/include/mach/powerdomain.h new file mode 100644 index 0000000..c8f52c6 --- /dev/null +++ b/arch/arm/plat-omap/include/mach/powerdomain.h @@ -0,0 +1,139 @@ +/* + * OMAP2/3 powerdomain control + * + * Copyright (C) 2007-8 Texas Instruments, Inc. + * Copyright (C) 2007-8 Nokia Corporation + * + * Written by Paul Walmsley + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef ASM_ARM_ARCH_OMAP_POWERDOMAIN +#define ASM_ARM_ARCH_OMAP_POWERDOMAIN + +#include +#include + +#include + +#include + + +/* Powerdomain basic power states */ +#define PWRDM_POWER_OFF 0x0 +#define PWRDM_POWER_RET 0x1 +#define PWRDM_POWER_INACTIVE 0x2 +#define PWRDM_POWER_ON 0x3 + +/* Powerdomain allowable state bitfields */ +#define PWRSTS_OFF_ON ((1 << PWRDM_POWER_OFF) | \ + (1 << PWRDM_POWER_ON)) + +#define PWRSTS_OFF_RET ((1 << PWRDM_POWER_OFF) | \ + (1 << PWRDM_POWER_RET)) + +#define PWRSTS_OFF_RET_ON (PWRSTS_OFF_RET | (1 << PWRDM_POWER_ON)) + + +/* + * Number of memory banks that are power-controllable. On OMAP3430, the + * maximum is 4. + */ +#define PWRDM_MAX_MEM_BANKS 4 + +/* XXX A completely arbitrary number. What is reasonable here? */ +#define PWRDM_TRANSITION_BAILOUT 100000 + +struct powerdomain; + +/* Encodes dependencies between powerdomains - statically defined */ +struct pwrdm_dep { + + /* Powerdomain name */ + const char *pwrdm_name; + + /* Powerdomain pointer - resolved by the powerdomain code */ + struct powerdomain *pwrdm; + + /* Flags to mark OMAP chip restrictions, etc. */ + const struct omap_chip_id omap_chip; + +}; + +struct powerdomain { + + /* Powerdomain name */ + const char *name; + + /* the address offset from CM_BASE/PRM_BASE */ + const s16 prcm_offs; + + /* Used to represent the OMAP chip types containing this pwrdm */ + const struct omap_chip_id omap_chip; + + /* Bit shift of this powerdomain's PM_WKDEP/CM_SLEEPDEP bit */ + const u8 dep_bit; + + /* Powerdomains that can be told to wake this powerdomain up */ + struct pwrdm_dep *wkdep_srcs; + + /* Powerdomains that can be told to keep this pwrdm from inactivity */ + struct pwrdm_dep *sleepdep_srcs; + + /* Possible powerdomain power states */ + const u8 pwrsts; + + /* Possible logic power states when pwrdm in RETENTION */ + const u8 pwrsts_logic_ret; + + /* Number of software-controllable memory banks in this powerdomain */ + const u8 banks; + + /* Possible memory bank pwrstates when pwrdm in RETENTION */ + const u8 pwrsts_mem_ret[PWRDM_MAX_MEM_BANKS]; + + /* Possible memory bank pwrstates when pwrdm is ON */ + const u8 pwrsts_mem_on[PWRDM_MAX_MEM_BANKS]; + + struct list_head node; + +}; + + +void pwrdm_init(struct powerdomain **pwrdm_list); + +int pwrdm_register(struct powerdomain *pwrdm); +int pwrdm_unregister(struct powerdomain *pwrdm); +struct powerdomain *pwrdm_lookup(const char *name); + +int pwrdm_for_each(int (*fn)(struct powerdomain *pwrdm)); + +int pwrdm_add_wkdep(struct powerdomain *pwrdm1, struct powerdomain *pwrdm2); +int pwrdm_del_wkdep(struct powerdomain *pwrdm1, struct powerdomain *pwrdm2); +int pwrdm_read_wkdep(struct powerdomain *pwrdm1, struct powerdomain *pwrdm2); +int pwrdm_add_sleepdep(struct powerdomain *pwrdm1, struct powerdomain *pwrdm2); +int pwrdm_del_sleepdep(struct powerdomain *pwrdm1, struct powerdomain *pwrdm2); +int pwrdm_read_sleepdep(struct powerdomain *pwrdm1, struct powerdomain *pwrdm2); + +int pwrdm_get_mem_bank_count(struct powerdomain *pwrdm); + +int pwrdm_set_next_pwrst(struct powerdomain *pwrdm, u8 pwrst); +int pwrdm_read_next_pwrst(struct powerdomain *pwrdm); +int pwrdm_read_prev_pwrst(struct powerdomain *pwrdm); +int pwrdm_clear_all_prev_pwrst(struct powerdomain *pwrdm); + +int pwrdm_set_logic_retst(struct powerdomain *pwrdm, u8 pwrst); +int pwrdm_set_mem_onst(struct powerdomain *pwrdm, u8 bank, u8 pwrst); +int pwrdm_set_mem_retst(struct powerdomain *pwrdm, u8 bank, u8 pwrst); + +int pwrdm_read_logic_pwrst(struct powerdomain *pwrdm); +int pwrdm_read_prev_logic_pwrst(struct powerdomain *pwrdm); +int pwrdm_read_mem_pwrst(struct powerdomain *pwrdm, u8 bank); +int pwrdm_read_prev_mem_pwrst(struct powerdomain *pwrdm, u8 bank); + +int pwrdm_wait_transition(struct powerdomain *pwrdm); + +#endif -- cgit v0.10.2 From 9717100f77538bbee54d2b5c293fd829b252d2a6 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Tue, 19 Aug 2008 11:08:40 +0300 Subject: ARM: OMAP2: Powerdomain: Add OMAP2/3 common powerdomains Add powerdomains common to both OMAP2 and OMAP3 (WKUP and GFX/SGX). Modify mach-omap2/io.c to initialize the powerdomain code on boot. Signed-off-by: Paul Walmsley Signed-off-by: Tony Lindgren diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 987351f..5f73cf0 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -24,6 +24,10 @@ #include #include +#include + +#include "powerdomains.h" + extern void omap_sram_init(void); extern int omap2_clk_init(void); extern void omap2_check_revision(void); @@ -101,6 +105,7 @@ void __init omap2_map_common_io(void) void __init omap2_init_common_hw(void) { omap2_mux_init(); + pwrdm_init(powerdomains_omap); omap2_clk_init(); /* * Need to Fix this for 2430 diff --git a/arch/arm/mach-omap2/powerdomains.h b/arch/arm/mach-omap2/powerdomains.h new file mode 100644 index 0000000..1936df2 --- /dev/null +++ b/arch/arm/mach-omap2/powerdomains.h @@ -0,0 +1,147 @@ +/* + * OMAP2/3 common powerdomain definitions + * + * Copyright (C) 2007-8 Texas Instruments, Inc. + * Copyright (C) 2007-8 Nokia Corporation + * + * Written by Paul Walmsley + * Debugging and integration fixes by Jouni Högander + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef ARCH_ARM_MACH_OMAP2_POWERDOMAINS +#define ARCH_ARM_MACH_OMAP2_POWERDOMAINS + +/* + * This file contains all of the powerdomains that have some element + * of software control for the OMAP24xx and OMAP34XX chips. + * + * A few notes: + * + * This is not an exhaustive listing of powerdomains on the chips; only + * powerdomains that can be controlled in software. + * + * A useful validation rule for struct powerdomain: + * Any powerdomain referenced by a wkdep_srcs or sleepdep_srcs array + * must have a dep_bit assigned. So wkdep_srcs/sleepdep_srcs are really + * just software-controllable dependencies. Non-software-controllable + * dependencies do exist, but they are not encoded below (yet). + * + * 24xx does not support programmable sleep dependencies (SLEEPDEP) + * + */ + +/* + * The names for the DSP/IVA2 powerdomains are confusing. + * + * Most OMAP chips have an on-board DSP. + * + * On the 2420, this is a 'C55 DSP called, simply, the DSP. Its + * powerdomain is called the "DSP power domain." On the 2430, the + * on-board DSP is a 'C64 DSP, now called the IVA2 or IVA2.1. Its + * powerdomain is still called the "DSP power domain." On the 3430, + * the DSP is a 'C64 DSP like the 2430, also known as the IVA2; but + * its powerdomain is now called the "IVA2 power domain." + * + * The 2420 also has something called the IVA, which is a separate ARM + * core, and has nothing to do with the DSP/IVA2. + * + * Ideally the DSP/IVA2 could just be the same powerdomain, but the PRCM + * address offset is different between the C55 and C64 DSPs. + * + * The overly-specific dep_bit names are due to a bit name collision + * with CM_FCLKEN_{DSP,IVA2}. The DSP/IVA2 PM_WKDEP and CM_SLEEPDEP shift + * value are the same for all powerdomains: 2 + */ + +/* + * XXX should dep_bit be a mask, so we can test to see if it is 0 as a + * sanity check? + * XXX encode hardware fixed wakeup dependencies -- esp. for 3430 CORE + */ + +#include + +#include "prcm-common.h" +#include "prm.h" +#include "cm.h" + +/* OMAP2/3-common powerdomains and wakeup dependencies */ + +/* + * 2420/2430 PM_WKDEP_GFX: CORE, MPU, WKUP + * 3430ES1 PM_WKDEP_GFX: adds IVA2, removes CORE + * 3430ES2 PM_WKDEP_SGX: adds IVA2, removes CORE + */ +static struct pwrdm_dep gfx_sgx_wkdeps[] = { + { + .pwrdm_name = "core_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX) + }, + { + .pwrdm_name = "iva2_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { + .pwrdm_name = "mpu_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX | + CHIP_IS_OMAP3430) + }, + { + .pwrdm_name = "wkup_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX | + CHIP_IS_OMAP3430) + }, + { NULL }, +}; + +/* + * OMAP2/3 common powerdomains + */ + +/* XXX add sleepdeps for this powerdomain : 3430 */ + +/* + * The GFX powerdomain is not present on 3430ES2, but currently we do not + * have a macro to filter it out at compile-time. + */ +static struct powerdomain gfx_pwrdm = { + .name = "gfx_pwrdm", + .prcm_offs = GFX_MOD, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX | + CHIP_IS_OMAP3430ES1), + .wkdep_srcs = gfx_sgx_wkdeps, + .pwrsts = PWRSTS_OFF_RET_ON, + .pwrsts_logic_ret = PWRDM_POWER_RET, + .banks = 1, + .pwrsts_mem_ret = { + [0] = PWRDM_POWER_RET, /* MEMRETSTATE */ + }, + .pwrsts_mem_on = { + [0] = PWRDM_POWER_ON, /* MEMONSTATE */ + }, +}; + +static struct powerdomain wkup_pwrdm = { + .name = "wkup_pwrdm", + .prcm_offs = WKUP_MOD, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX | CHIP_IS_OMAP3430), + .dep_bit = OMAP_EN_WKUP_SHIFT, +}; + + + +/* As powerdomains are added or removed above, this list must also be changed */ +static struct powerdomain *powerdomains_omap[] __initdata = { + + &gfx_pwrdm, + &wkup_pwrdm, + + NULL +}; + + +#endif diff --git a/arch/arm/mach-omap2/prm.h b/arch/arm/mach-omap2/prm.h index bbf41fc..eb9982f 100644 --- a/arch/arm/mach-omap2/prm.h +++ b/arch/arm/mach-omap2/prm.h @@ -305,7 +305,8 @@ static inline u32 prm_clear_mod_reg_bits(u32 bits, s16 module, s16 idx) * 3430: PM_WKDEP_IVA2, PM_WKDEP_GFX, PM_WKDEP_DSS, PM_WKDEP_CAM, * PM_WKDEP_PER */ -#define OMAP_EN_WKUP (1 << 4) +#define OMAP_EN_WKUP_SHIFT 4 +#define OMAP_EN_WKUP_MASK (1 << 4) /* * 24XX: PM_PWSTCTRL_MPU, PM_PWSTCTRL_CORE, PM_PWSTCTRL_GFX, -- cgit v0.10.2 From fe6a58f8f50500a4c9a82da4a9bdae41c1500fa0 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Tue, 19 Aug 2008 11:08:42 +0300 Subject: ARM: OMAP2: Powerdomain: Add OMAP2 powerdomains Add OMAP2-specific powerdomains. Signed-off-by: Paul Walmsley Signed-off-by: Tony Lindgren diff --git a/arch/arm/mach-omap2/powerdomains.h b/arch/arm/mach-omap2/powerdomains.h index 1936df2..325e2ba 100644 --- a/arch/arm/mach-omap2/powerdomains.h +++ b/arch/arm/mach-omap2/powerdomains.h @@ -98,6 +98,10 @@ static struct pwrdm_dep gfx_sgx_wkdeps[] = { { NULL }, }; + +#include "powerdomains24xx.h" + + /* * OMAP2/3 common powerdomains */ @@ -140,6 +144,16 @@ static struct powerdomain *powerdomains_omap[] __initdata = { &gfx_pwrdm, &wkup_pwrdm, +#ifdef CONFIG_ARCH_OMAP24XX + &dsp_pwrdm, + &mpu_24xx_pwrdm, + &core_24xx_pwrdm, +#endif + +#ifdef CONFIG_ARCH_OMAP2430 + &mdm_pwrdm, +#endif + NULL }; diff --git a/arch/arm/mach-omap2/powerdomains24xx.h b/arch/arm/mach-omap2/powerdomains24xx.h new file mode 100644 index 0000000..9f08dc3 --- /dev/null +++ b/arch/arm/mach-omap2/powerdomains24xx.h @@ -0,0 +1,200 @@ +/* + * OMAP24XX powerdomain definitions + * + * Copyright (C) 2007-2008 Texas Instruments, Inc. + * Copyright (C) 2007-2008 Nokia Corporation + * + * Written by Paul Walmsley + * Debugging and integration fixes by Jouni Högander + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef ARCH_ARM_MACH_OMAP2_POWERDOMAINS24XX +#define ARCH_ARM_MACH_OMAP2_POWERDOMAINS24XX + +/* + * N.B. If powerdomains are added or removed from this file, update + * the array in mach-omap2/powerdomains.h. + */ + +#include + +#include "prcm-common.h" +#include "prm.h" +#include "prm-regbits-24xx.h" +#include "cm.h" +#include "cm-regbits-24xx.h" + +/* 24XX powerdomains and dependencies */ + +#ifdef CONFIG_ARCH_OMAP24XX + + +/* Wakeup dependency source arrays */ + +/* + * 2420/2430 PM_WKDEP_DSP: CORE, MPU, WKUP + * 2430 PM_WKDEP_MDM: same as above + */ +static struct pwrdm_dep dsp_mdm_24xx_wkdeps[] = { + { + .pwrdm_name = "core_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX) + }, + { + .pwrdm_name = "mpu_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX) + }, + { + .pwrdm_name = "wkup_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX) + }, + { NULL }, +}; + +/* + * 2420 PM_WKDEP_MPU: CORE, DSP, WKUP + * 2430 adds MDM + */ +static struct pwrdm_dep mpu_24xx_wkdeps[] = { + { + .pwrdm_name = "core_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX) + }, + { + .pwrdm_name = "dsp_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX) + }, + { + .pwrdm_name = "wkup_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX) + }, + { + .pwrdm_name = "mdm_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP2430) + }, + { NULL }, +}; + +/* + * 2420 PM_WKDEP_CORE: DSP, GFX, MPU, WKUP + * 2430 adds MDM + */ +static struct pwrdm_dep core_24xx_wkdeps[] = { + { + .pwrdm_name = "dsp_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX) + }, + { + .pwrdm_name = "gfx_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX) + }, + { + .pwrdm_name = "mpu_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX) + }, + { + .pwrdm_name = "wkup_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX) + }, + { + .pwrdm_name = "mdm_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP2430) + }, + { NULL }, +}; + + +/* Powerdomains */ + +static struct powerdomain dsp_pwrdm = { + .name = "dsp_pwrdm", + .prcm_offs = OMAP24XX_DSP_MOD, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX), + .dep_bit = OMAP24XX_PM_WKDEP_MPU_EN_DSP_SHIFT, + .wkdep_srcs = dsp_mdm_24xx_wkdeps, + .pwrsts = PWRSTS_OFF_RET_ON, + .pwrsts_logic_ret = PWRDM_POWER_RET, + .banks = 1, + .pwrsts_mem_ret = { + [0] = PWRDM_POWER_RET, + }, + .pwrsts_mem_on = { + [0] = PWRDM_POWER_ON, + }, +}; + +static struct powerdomain mpu_24xx_pwrdm = { + .name = "mpu_pwrdm", + .prcm_offs = MPU_MOD, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX), + .dep_bit = OMAP24XX_EN_MPU_SHIFT, + .wkdep_srcs = mpu_24xx_wkdeps, + .pwrsts = PWRSTS_OFF_RET_ON, + .pwrsts_logic_ret = PWRSTS_OFF_RET, + .banks = 1, + .pwrsts_mem_ret = { + [0] = PWRDM_POWER_RET, + }, + .pwrsts_mem_on = { + [0] = PWRDM_POWER_ON, + }, +}; + +static struct powerdomain core_24xx_pwrdm = { + .name = "core_pwrdm", + .prcm_offs = CORE_MOD, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX), + .wkdep_srcs = core_24xx_wkdeps, + .pwrsts = PWRSTS_OFF_RET_ON, + .dep_bit = OMAP24XX_EN_CORE_SHIFT, + .banks = 3, + .pwrsts_mem_ret = { + [0] = PWRSTS_OFF_RET, /* MEM1RETSTATE */ + [1] = PWRSTS_OFF_RET, /* MEM2RETSTATE */ + [2] = PWRSTS_OFF_RET, /* MEM3RETSTATE */ + }, + .pwrsts_mem_on = { + [0] = PWRSTS_OFF_RET_ON, /* MEM1ONSTATE */ + [1] = PWRSTS_OFF_RET_ON, /* MEM2ONSTATE */ + [2] = PWRSTS_OFF_RET_ON, /* MEM3ONSTATE */ + }, +}; + +#endif /* CONFIG_ARCH_OMAP24XX */ + + + +/* + * 2430-specific powerdomains + */ + +#ifdef CONFIG_ARCH_OMAP2430 + +/* XXX 2430 KILLDOMAINWKUP bit? No current users apparently */ + +/* Another case of bit name collisions between several registers: EN_MDM */ +static struct powerdomain mdm_pwrdm = { + .name = "mdm_pwrdm", + .prcm_offs = OMAP2430_MDM_MOD, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP2430), + .dep_bit = OMAP2430_PM_WKDEP_MPU_EN_MDM_SHIFT, + .wkdep_srcs = dsp_mdm_24xx_wkdeps, + .pwrsts = PWRSTS_OFF_RET_ON, + .pwrsts_logic_ret = PWRDM_POWER_RET, + .banks = 1, + .pwrsts_mem_ret = { + [0] = PWRDM_POWER_RET, /* MEMRETSTATE */ + }, + .pwrsts_mem_on = { + [0] = PWRDM_POWER_ON, /* MEMONSTATE */ + }, +}; + +#endif /* CONFIG_ARCH_OMAP2430 */ + + +#endif diff --git a/arch/arm/mach-omap2/prm-regbits-24xx.h b/arch/arm/mach-omap2/prm-regbits-24xx.h index c6d17a3..4002051 100644 --- a/arch/arm/mach-omap2/prm-regbits-24xx.h +++ b/arch/arm/mach-omap2/prm-regbits-24xx.h @@ -29,8 +29,10 @@ #define OMAP24XX_WKUP1_EN (1 << 0) /* PM_WKDEP_GFX, PM_WKDEP_MPU, PM_WKDEP_DSP, PM_WKDEP_MDM shared bits */ -#define OMAP24XX_EN_MPU (1 << 1) -#define OMAP24XX_EN_CORE (1 << 0) +#define OMAP24XX_EN_MPU_SHIFT 1 +#define OMAP24XX_EN_MPU_MASK (1 << 1) +#define OMAP24XX_EN_CORE_SHIFT 0 +#define OMAP24XX_EN_CORE_MASK (1 << 0) /* * PM_PWSTCTRL_MPU, PM_PWSTCTRL_GFX, PM_PWSTCTRL_DSP, PM_PWSTCTRL_MDM @@ -140,8 +142,10 @@ /* 2430 calls GLOBALWMPU_RST "GLOBALWARM_RST" instead */ /* PM_WKDEP_MPU specific bits */ -#define OMAP2430_PM_WKDEP_MPU_EN_MDM (1 << 5) -#define OMAP24XX_PM_WKDEP_MPU_EN_DSP (1 << 2) +#define OMAP2430_PM_WKDEP_MPU_EN_MDM_SHIFT 5 +#define OMAP2430_PM_WKDEP_MPU_EN_MDM_MASK (1 << 5) +#define OMAP24XX_PM_WKDEP_MPU_EN_DSP_SHIFT 2 +#define OMAP24XX_PM_WKDEP_MPU_EN_DSP_MASK (1 << 2) /* PM_EVGENCTRL_MPU specific bits */ -- cgit v0.10.2 From ecb24aa129c6d4b2152571f856320aa7dea41676 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Tue, 19 Aug 2008 11:08:43 +0300 Subject: ARM: OMAP: Powerdomain: Add OMAP3 powerdomains Add OMAP3-specific powerdomains. Signed-off-by: Paul Walmsley Signed-off-by: Tony Lindgren diff --git a/arch/arm/mach-omap2/powerdomains.h b/arch/arm/mach-omap2/powerdomains.h index 325e2ba..1e151fa 100644 --- a/arch/arm/mach-omap2/powerdomains.h +++ b/arch/arm/mach-omap2/powerdomains.h @@ -98,16 +98,28 @@ static struct pwrdm_dep gfx_sgx_wkdeps[] = { { NULL }, }; +/* + * 3430: CM_SLEEPDEP_CAM: MPU + * 3430ES1: CM_SLEEPDEP_GFX: MPU + * 3430ES2: CM_SLEEPDEP_SGX: MPU + */ +static struct pwrdm_dep cam_gfx_sleepdeps[] = { + { + .pwrdm_name = "mpu_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { NULL }, +}; + #include "powerdomains24xx.h" +#include "powerdomains34xx.h" /* * OMAP2/3 common powerdomains */ -/* XXX add sleepdeps for this powerdomain : 3430 */ - /* * The GFX powerdomain is not present on 3430ES2, but currently we do not * have a macro to filter it out at compile-time. @@ -118,6 +130,7 @@ static struct powerdomain gfx_pwrdm = { .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX | CHIP_IS_OMAP3430ES1), .wkdep_srcs = gfx_sgx_wkdeps, + .sleepdep_srcs = cam_gfx_sleepdeps, .pwrsts = PWRSTS_OFF_RET_ON, .pwrsts_logic_ret = PWRDM_POWER_RET, .banks = 1, @@ -154,6 +167,19 @@ static struct powerdomain *powerdomains_omap[] __initdata = { &mdm_pwrdm, #endif +#ifdef CONFIG_ARCH_OMAP34XX + &iva2_pwrdm, + &mpu_34xx_pwrdm, + &neon_pwrdm, + &core_34xx_pwrdm, + &cam_pwrdm, + &dss_pwrdm, + &per_pwrdm, + &emu_pwrdm, + &sgx_pwrdm, + &usbhost_pwrdm, +#endif + NULL }; diff --git a/arch/arm/mach-omap2/powerdomains34xx.h b/arch/arm/mach-omap2/powerdomains34xx.h new file mode 100644 index 0000000..f573f71 --- /dev/null +++ b/arch/arm/mach-omap2/powerdomains34xx.h @@ -0,0 +1,327 @@ +/* + * OMAP34XX powerdomain definitions + * + * Copyright (C) 2007-2008 Texas Instruments, Inc. + * Copyright (C) 2007-2008 Nokia Corporation + * + * Written by Paul Walmsley + * Debugging and integration fixes by Jouni Högander + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef ARCH_ARM_MACH_OMAP2_POWERDOMAINS34XX +#define ARCH_ARM_MACH_OMAP2_POWERDOMAINS34XX + +/* + * N.B. If powerdomains are added or removed from this file, update + * the array in mach-omap2/powerdomains.h. + */ + +#include + +#include "prcm-common.h" +#include "prm.h" +#include "prm-regbits-34xx.h" +#include "cm.h" +#include "cm-regbits-34xx.h" + +/* + * 34XX-specific powerdomains, dependencies + */ + +#ifdef CONFIG_ARCH_OMAP34XX + +/* + * 3430: PM_WKDEP_{PER,USBHOST}: CORE, IVA2, MPU, WKUP + * (USBHOST is ES2 only) + */ +static struct pwrdm_dep per_usbhost_wkdeps[] = { + { + .pwrdm_name = "core_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { + .pwrdm_name = "iva2_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { + .pwrdm_name = "mpu_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { + .pwrdm_name = "wkup_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { NULL }, +}; + +/* + * 3430 PM_WKDEP_MPU: CORE, IVA2, DSS, PER + */ +static struct pwrdm_dep mpu_34xx_wkdeps[] = { + { + .pwrdm_name = "core_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { + .pwrdm_name = "iva2_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { + .pwrdm_name = "dss_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { + .pwrdm_name = "per_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { NULL }, +}; + +/* + * 3430 PM_WKDEP_IVA2: CORE, MPU, WKUP, DSS, PER + */ +static struct pwrdm_dep iva2_wkdeps[] = { + { + .pwrdm_name = "core_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { + .pwrdm_name = "mpu_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { + .pwrdm_name = "wkup_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { + .pwrdm_name = "dss_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { + .pwrdm_name = "per_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { NULL }, +}; + + +/* 3430 PM_WKDEP_{CAM,DSS}: IVA2, MPU, WKUP */ +static struct pwrdm_dep cam_dss_wkdeps[] = { + { + .pwrdm_name = "iva2_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { + .pwrdm_name = "mpu_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { + .pwrdm_name = "wkup_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { NULL }, +}; + +/* 3430: PM_WKDEP_NEON: MPU */ +static struct pwrdm_dep neon_wkdeps[] = { + { + .pwrdm_name = "mpu_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { NULL }, +}; + + +/* Sleep dependency source arrays for 34xx-specific pwrdms - 34XX only */ + +/* + * 3430: CM_SLEEPDEP_{DSS,PER}: MPU, IVA + * 3430ES2: CM_SLEEPDEP_USBHOST: MPU, IVA + */ +static struct pwrdm_dep dss_per_usbhost_sleepdeps[] = { + { + .pwrdm_name = "mpu_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { + .pwrdm_name = "iva2_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { NULL }, +}; + + +/* + * Powerdomains + */ + +static struct powerdomain iva2_pwrdm = { + .name = "iva2_pwrdm", + .prcm_offs = OMAP3430_IVA2_MOD, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430), + .dep_bit = OMAP3430_PM_WKDEP_MPU_EN_IVA2_SHIFT, + .wkdep_srcs = iva2_wkdeps, + .pwrsts = PWRSTS_OFF_RET_ON, + .pwrsts_logic_ret = PWRSTS_OFF_RET, + .banks = 4, + .pwrsts_mem_ret = { + [0] = PWRSTS_OFF_RET, + [1] = PWRSTS_OFF_RET, + [2] = PWRSTS_OFF_RET, + [3] = PWRSTS_OFF_RET, + }, + .pwrsts_mem_on = { + [0] = PWRDM_POWER_ON, + [1] = PWRDM_POWER_ON, + [2] = PWRSTS_OFF_ON, + [3] = PWRDM_POWER_ON, + }, +}; + +static struct powerdomain mpu_34xx_pwrdm = { + .name = "mpu_pwrdm", + .prcm_offs = MPU_MOD, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430), + .dep_bit = OMAP3430_EN_MPU_SHIFT, + .wkdep_srcs = mpu_34xx_wkdeps, + .pwrsts = PWRSTS_OFF_RET_ON, + .pwrsts_logic_ret = PWRSTS_OFF_RET, + .banks = 1, + .pwrsts_mem_ret = { + [0] = PWRSTS_OFF_RET, + }, + .pwrsts_mem_on = { + [0] = PWRSTS_OFF_ON, + }, +}; + +/* No wkdeps or sleepdeps for 34xx core apparently */ +static struct powerdomain core_34xx_pwrdm = { + .name = "core_pwrdm", + .prcm_offs = CORE_MOD, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430), + .pwrsts = PWRSTS_OFF_RET_ON, + .dep_bit = OMAP3430_EN_CORE_SHIFT, + .banks = 2, + .pwrsts_mem_ret = { + [0] = PWRSTS_OFF_RET, /* MEM1RETSTATE */ + [1] = PWRSTS_OFF_RET, /* MEM2RETSTATE */ + }, + .pwrsts_mem_on = { + [0] = PWRSTS_OFF_RET_ON, /* MEM1ONSTATE */ + [1] = PWRSTS_OFF_RET_ON, /* MEM2ONSTATE */ + }, +}; + +/* Another case of bit name collisions between several registers: EN_DSS */ +static struct powerdomain dss_pwrdm = { + .name = "dss_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430), + .prcm_offs = OMAP3430_DSS_MOD, + .dep_bit = OMAP3430_PM_WKDEP_MPU_EN_DSS_SHIFT, + .wkdep_srcs = cam_dss_wkdeps, + .sleepdep_srcs = dss_per_usbhost_sleepdeps, + .pwrsts = PWRSTS_OFF_RET_ON, + .pwrsts_logic_ret = PWRDM_POWER_RET, + .banks = 1, + .pwrsts_mem_ret = { + [0] = PWRDM_POWER_RET, /* MEMRETSTATE */ + }, + .pwrsts_mem_on = { + [0] = PWRDM_POWER_ON, /* MEMONSTATE */ + }, +}; + +static struct powerdomain sgx_pwrdm = { + .name = "sgx_pwrdm", + .prcm_offs = OMAP3430ES2_SGX_MOD, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430ES2), + .wkdep_srcs = gfx_sgx_wkdeps, + .sleepdep_srcs = cam_gfx_sleepdeps, + /* XXX This is accurate for 3430 SGX, but what about GFX? */ + .pwrsts = PWRSTS_OFF_RET_ON, + .pwrsts_logic_ret = PWRDM_POWER_RET, + .banks = 1, + .pwrsts_mem_ret = { + [0] = PWRDM_POWER_RET, /* MEMRETSTATE */ + }, + .pwrsts_mem_on = { + [0] = PWRDM_POWER_ON, /* MEMONSTATE */ + }, +}; + +static struct powerdomain cam_pwrdm = { + .name = "cam_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430), + .prcm_offs = OMAP3430_CAM_MOD, + .wkdep_srcs = cam_dss_wkdeps, + .sleepdep_srcs = cam_gfx_sleepdeps, + .pwrsts = PWRSTS_OFF_RET_ON, + .pwrsts_logic_ret = PWRDM_POWER_RET, + .banks = 1, + .pwrsts_mem_ret = { + [0] = PWRDM_POWER_RET, /* MEMRETSTATE */ + }, + .pwrsts_mem_on = { + [0] = PWRDM_POWER_ON, /* MEMONSTATE */ + }, +}; + +static struct powerdomain per_pwrdm = { + .name = "per_pwrdm", + .prcm_offs = OMAP3430_PER_MOD, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430), + .dep_bit = OMAP3430_EN_PER_SHIFT, + .wkdep_srcs = per_usbhost_wkdeps, + .sleepdep_srcs = dss_per_usbhost_sleepdeps, + .pwrsts = PWRSTS_OFF_RET_ON, + .pwrsts_logic_ret = PWRSTS_OFF_RET, + .banks = 1, + .pwrsts_mem_ret = { + [0] = PWRDM_POWER_RET, /* MEMRETSTATE */ + }, + .pwrsts_mem_on = { + [0] = PWRDM_POWER_ON, /* MEMONSTATE */ + }, +}; + +static struct powerdomain emu_pwrdm = { + .name = "emu_pwrdm", + .prcm_offs = OMAP3430_EMU_MOD, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430), +}; + +static struct powerdomain neon_pwrdm = { + .name = "neon_pwrdm", + .prcm_offs = OMAP3430_NEON_MOD, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430), + .wkdep_srcs = neon_wkdeps, + .pwrsts = PWRSTS_OFF_RET_ON, + .pwrsts_logic_ret = PWRDM_POWER_RET, +}; + +static struct powerdomain usbhost_pwrdm = { + .name = "usbhost_pwrdm", + .prcm_offs = OMAP3430ES2_USBHOST_MOD, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430ES2), + .wkdep_srcs = per_usbhost_wkdeps, + .sleepdep_srcs = dss_per_usbhost_sleepdeps, + .pwrsts = PWRSTS_OFF_RET_ON, + .pwrsts_logic_ret = PWRDM_POWER_RET, + .banks = 1, + .pwrsts_mem_ret = { + [0] = PWRDM_POWER_RET, /* MEMRETSTATE */ + }, + .pwrsts_mem_on = { + [0] = PWRDM_POWER_ON, /* MEMONSTATE */ + }, +}; + +#endif /* CONFIG_ARCH_OMAP34XX */ + + +#endif diff --git a/arch/arm/mach-omap2/prcm-common.h b/arch/arm/mach-omap2/prcm-common.h index 54c32f4..4a32822 100644 --- a/arch/arm/mach-omap2/prcm-common.h +++ b/arch/arm/mach-omap2/prcm-common.h @@ -312,7 +312,8 @@ #define OMAP3430_ST_GPT2 (1 << 3) /* CM_SLEEPDEP_PER, PM_WKDEP_IVA2, PM_WKDEP_MPU, PM_WKDEP_PER shared bits */ -#define OMAP3430_EN_CORE (1 << 0) +#define OMAP3430_EN_CORE_SHIFT 0 +#define OMAP3430_EN_CORE_MASK (1 << 0) #endif diff --git a/arch/arm/mach-omap2/prm-regbits-34xx.h b/arch/arm/mach-omap2/prm-regbits-34xx.h index b4686bc..5b5ecfe 100644 --- a/arch/arm/mach-omap2/prm-regbits-34xx.h +++ b/arch/arm/mach-omap2/prm-regbits-34xx.h @@ -68,7 +68,8 @@ #define OMAP3430_VPINIDLE (1 << 0) /* PM_WKDEP_IVA2, PM_WKDEP_MPU shared bits */ -#define OMAP3430_EN_PER (1 << 7) +#define OMAP3430_EN_PER_SHIFT 7 +#define OMAP3430_EN_PER_MASK (1 << 7) /* PM_PWSTCTRL_IVA2, PM_PWSTCTRL_MPU, PM_PWSTCTRL_CORE shared bits */ #define OMAP3430_MEMORYCHANGE (1 << 3) @@ -77,7 +78,7 @@ #define OMAP3430_LOGICSTATEST (1 << 2) /* PM_PREPWSTST_IVA2, PM_PREPWSTST_CORE shared bits */ -#define OMAP3430_LASTLOGICSTATEENTERED (1 << 2) +#define OMAP3430_LASTLOGICSTATEENTERED (1 << 2) /* * PM_PREPWSTST_IVA2, PM_PREPWSTST_MPU, PM_PREPWSTST_CORE, @@ -278,8 +279,10 @@ #define OMAP3430_EMULATION_MPU_RST (1 << 11) /* PM_WKDEP_MPU specific bits */ -#define OMAP3430_PM_WKDEP_MPU_EN_DSS (1 << 5) -#define OMAP3430_PM_WKDEP_MPU_EN_IVA2 (1 << 2) +#define OMAP3430_PM_WKDEP_MPU_EN_DSS_SHIFT 5 +#define OMAP3430_PM_WKDEP_MPU_EN_DSS_MASK (1 << 5) +#define OMAP3430_PM_WKDEP_MPU_EN_IVA2_SHIFT 2 +#define OMAP3430_PM_WKDEP_MPU_EN_IVA2_MASK (1 << 2) /* PM_EVGENCTRL_MPU */ #define OMAP3430_OFFLOADMODE_SHIFT 3 -- cgit v0.10.2 From d459bfe01f523983a822de8c2d3fe0bd2f2c194e Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Tue, 19 Aug 2008 11:08:43 +0300 Subject: ARM: OMAP2: Clockdomain: Add base OMAP2/3 clockdomain code This patch creates an interface to the clockdomain registers in the PRM/CM modules on OMAP2/3. This interface is intended to be used by PM code, e.g., pm.c; not by device drivers directly. The patch also adds clockdomain usecount tracking. This is intended to be called whenever the first clock in a clockdomain is enabled, or when the last enabled clock in a clockdomain is disabled. If the clockdomain is in software-supervised mode, the code will force-wakeup or force-sleep the clockdomain. If the clockdomain is in hardware-supervised mode, the first clock enable will add sleep and wakeup dependencies on a user-selectable set of parent domains (usually MPU & IVA2), and the disable will remove them. Each clockdomain will be defined in later patches as static structures. The clockdomain structures are linked into a list at boot by clkdm_register(), similar to the OMAP clock code. The patch adds a Kconfig option, CONFIG_OMAP_DEBUG_CLOCKDOMAIN, which when enabled will emit verbose debug messages via pr_debug(). Signed-off-by: Paul Walmsley Signed-off-by: Tony Lindgren diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 1001d42..e7cf1b4 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -4,7 +4,8 @@ # Common support obj-y := irq.o id.o io.o memory.o control.o prcm.o clock.o mux.o \ - devices.o serial.o gpmc.o timer-gp.o powerdomain.o + devices.o serial.o gpmc.o timer-gp.o powerdomain.o \ + clockdomain.o obj-$(CONFIG_OMAP_MCBSP) += mcbsp.o diff --git a/arch/arm/mach-omap2/clockdomain.c b/arch/arm/mach-omap2/clockdomain.c new file mode 100644 index 0000000..f867d8f --- /dev/null +++ b/arch/arm/mach-omap2/clockdomain.c @@ -0,0 +1,603 @@ +/* + * OMAP2/3 clockdomain framework functions + * + * Copyright (C) 2008 Texas Instruments, Inc. + * Copyright (C) 2008 Nokia Corporation + * + * Written by Paul Walmsley and Jouni Högander + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifdef CONFIG_OMAP_DEBUG_CLOCKDOMAIN +# define DEBUG +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include + +#include "prm.h" +#include "prm-regbits-24xx.h" +#include "cm.h" + +#include +#include + +/* clkdm_list contains all registered struct clockdomains */ +static LIST_HEAD(clkdm_list); + +/* clkdm_mutex protects clkdm_list add and del ops */ +static DEFINE_MUTEX(clkdm_mutex); + +/* array of powerdomain deps to be added/removed when clkdm in hwsup mode */ +static struct clkdm_pwrdm_autodep *autodeps; + + +/* Private functions */ + +/* + * _autodep_lookup - resolve autodep pwrdm names to pwrdm pointers; store + * @autodep: struct clkdm_pwrdm_autodep * to resolve + * + * Resolve autodep powerdomain names to powerdomain pointers via + * pwrdm_lookup() and store the pointers in the autodep structure. An + * "autodep" is a powerdomain sleep/wakeup dependency that is + * automatically added and removed whenever clocks in the associated + * clockdomain are enabled or disabled (respectively) when the + * clockdomain is in hardware-supervised mode. Meant to be called + * once at clockdomain layer initialization, since these should remain + * fixed for a particular architecture. No return value. + */ +static void _autodep_lookup(struct clkdm_pwrdm_autodep *autodep) +{ + struct powerdomain *pwrdm; + + if (!autodep) + return; + + if (!omap_chip_is(autodep->omap_chip)) + return; + + pwrdm = pwrdm_lookup(autodep->pwrdm_name); + if (!pwrdm) { + pr_debug("clockdomain: _autodep_lookup: powerdomain %s " + "does not exist\n", autodep->pwrdm_name); + WARN_ON(1); + return; + } + autodep->pwrdm = pwrdm; + + return; +} + +/* + * _clkdm_add_autodeps - add auto sleepdeps/wkdeps to clkdm upon clock enable + * @clkdm: struct clockdomain * + * + * Add the "autodep" sleep & wakeup dependencies to clockdomain 'clkdm' + * in hardware-supervised mode. Meant to be called from clock framework + * when a clock inside clockdomain 'clkdm' is enabled. No return value. + */ +static void _clkdm_add_autodeps(struct clockdomain *clkdm) +{ + struct clkdm_pwrdm_autodep *autodep; + + for (autodep = autodeps; autodep->pwrdm_name; autodep++) { + if (!autodep->pwrdm) + continue; + + pr_debug("clockdomain: adding %s sleepdep/wkdep for " + "pwrdm %s\n", autodep->pwrdm_name, + clkdm->pwrdm->name); + + pwrdm_add_sleepdep(clkdm->pwrdm, autodep->pwrdm); + pwrdm_add_wkdep(clkdm->pwrdm, autodep->pwrdm); + } +} + +/* + * _clkdm_add_autodeps - remove auto sleepdeps/wkdeps from clkdm + * @clkdm: struct clockdomain * + * + * Remove the "autodep" sleep & wakeup dependencies from clockdomain 'clkdm' + * in hardware-supervised mode. Meant to be called from clock framework + * when a clock inside clockdomain 'clkdm' is disabled. No return value. + */ +static void _clkdm_del_autodeps(struct clockdomain *clkdm) +{ + struct clkdm_pwrdm_autodep *autodep; + + for (autodep = autodeps; autodep->pwrdm_name; autodep++) { + if (!autodep->pwrdm) + continue; + + pr_debug("clockdomain: removing %s sleepdep/wkdep for " + "pwrdm %s\n", autodep->pwrdm_name, + clkdm->pwrdm->name); + + pwrdm_del_sleepdep(clkdm->pwrdm, autodep->pwrdm); + pwrdm_del_wkdep(clkdm->pwrdm, autodep->pwrdm); + } +} + + +static struct clockdomain *_clkdm_lookup(const char *name) +{ + struct clockdomain *clkdm, *temp_clkdm; + + if (!name) + return NULL; + + clkdm = NULL; + + list_for_each_entry(temp_clkdm, &clkdm_list, node) { + if (!strcmp(name, temp_clkdm->name)) { + clkdm = temp_clkdm; + break; + } + } + + return clkdm; +} + + +/* Public functions */ + +/** + * clkdm_init - set up the clockdomain layer + * @clkdms: optional pointer to an array of clockdomains to register + * @init_autodeps: optional pointer to an array of autodeps to register + * + * Set up internal state. If a pointer to an array of clockdomains + * was supplied, loop through the list of clockdomains, register all + * that are available on the current platform. Similarly, if a + * pointer to an array of clockdomain-powerdomain autodependencies was + * provided, register those. No return value. + */ +void clkdm_init(struct clockdomain **clkdms, + struct clkdm_pwrdm_autodep *init_autodeps) +{ + struct clockdomain **c = NULL; + struct clkdm_pwrdm_autodep *autodep = NULL; + + if (clkdms) + for (c = clkdms; *c; c++) + clkdm_register(*c); + + autodeps = init_autodeps; + if (autodeps) + for (autodep = autodeps; autodep->pwrdm_name; autodep++) + _autodep_lookup(autodep); +} + +/** + * clkdm_register - register a clockdomain + * @clkdm: struct clockdomain * to register + * + * Adds a clockdomain to the internal clockdomain list. + * Returns -EINVAL if given a null pointer, -EEXIST if a clockdomain is + * already registered by the provided name, or 0 upon success. + */ +int clkdm_register(struct clockdomain *clkdm) +{ + int ret = -EINVAL; + struct powerdomain *pwrdm; + + if (!clkdm || !clkdm->name) + return -EINVAL; + + if (!omap_chip_is(clkdm->omap_chip)) + return -EINVAL; + + pwrdm = pwrdm_lookup(clkdm->pwrdm_name); + if (!pwrdm) { + pr_debug("clockdomain: clkdm_register %s: powerdomain %s " + "does not exist\n", clkdm->name, clkdm->pwrdm_name); + return -EINVAL; + } + clkdm->pwrdm = pwrdm; + + mutex_lock(&clkdm_mutex); + /* Verify that the clockdomain is not already registered */ + if (_clkdm_lookup(clkdm->name)) { + ret = -EEXIST; + goto cr_unlock; + }; + + list_add(&clkdm->node, &clkdm_list); + + pr_debug("clockdomain: registered %s\n", clkdm->name); + ret = 0; + +cr_unlock: + mutex_unlock(&clkdm_mutex); + + return ret; +} + +/** + * clkdm_unregister - unregister a clockdomain + * @clkdm: struct clockdomain * to unregister + * + * Removes a clockdomain from the internal clockdomain list. Returns + * -EINVAL if clkdm argument is NULL. + */ +int clkdm_unregister(struct clockdomain *clkdm) +{ + if (!clkdm) + return -EINVAL; + + mutex_lock(&clkdm_mutex); + list_del(&clkdm->node); + mutex_unlock(&clkdm_mutex); + + pr_debug("clockdomain: unregistered %s\n", clkdm->name); + + return 0; +} + +/** + * clkdm_lookup - look up a clockdomain by name, return a pointer + * @name: name of clockdomain + * + * Find a registered clockdomain by its name. Returns a pointer to the + * struct clockdomain if found, or NULL otherwise. + */ +struct clockdomain *clkdm_lookup(const char *name) +{ + struct clockdomain *clkdm, *temp_clkdm; + + if (!name) + return NULL; + + clkdm = NULL; + + mutex_lock(&clkdm_mutex); + list_for_each_entry(temp_clkdm, &clkdm_list, node) { + if (!strcmp(name, temp_clkdm->name)) { + clkdm = temp_clkdm; + break; + } + } + mutex_unlock(&clkdm_mutex); + + return clkdm; +} + +/** + * clkdm_for_each - call function on each registered clockdomain + * @fn: callback function * + * + * Call the supplied function for each registered clockdomain. + * The callback function can return anything but 0 to bail + * out early from the iterator. The callback function is called with + * the clkdm_mutex held, so no clockdomain structure manipulation + * functions should be called from the callback, although hardware + * clockdomain control functions are fine. Returns the last return + * value of the callback function, which should be 0 for success or + * anything else to indicate failure; or -EINVAL if the function pointer + * is null. + */ +int clkdm_for_each(int (*fn)(struct clockdomain *clkdm)) +{ + struct clockdomain *clkdm; + int ret = 0; + + if (!fn) + return -EINVAL; + + mutex_lock(&clkdm_mutex); + list_for_each_entry(clkdm, &clkdm_list, node) { + ret = (*fn)(clkdm); + if (ret) + break; + } + mutex_unlock(&clkdm_mutex); + + return ret; +} + + +/* Hardware clockdomain control */ + +/** + * omap2_clkdm_clktrctrl_read - read the clkdm's current state transition mode + * @clk: struct clk * of a clockdomain + * + * Return the clockdomain's current state transition mode from the + * corresponding domain CM_CLKSTCTRL register. Returns -EINVAL if clk + * is NULL or the current mode upon success. + */ +static int omap2_clkdm_clktrctrl_read(struct clockdomain *clkdm) +{ + u32 v; + + if (!clkdm) + return -EINVAL; + + v = cm_read_mod_reg(clkdm->pwrdm->prcm_offs, CM_CLKSTCTRL); + v &= clkdm->clktrctrl_mask; + v >>= __ffs(clkdm->clktrctrl_mask); + + return v; +} + +/** + * omap2_clkdm_sleep - force clockdomain sleep transition + * @clkdm: struct clockdomain * + * + * Instruct the CM to force a sleep transition on the specified + * clockdomain 'clkdm'. Returns -EINVAL if clk is NULL or if + * clockdomain does not support software-initiated sleep; 0 upon + * success. + */ +int omap2_clkdm_sleep(struct clockdomain *clkdm) +{ + if (!clkdm) + return -EINVAL; + + if (!(clkdm->flags & CLKDM_CAN_FORCE_SLEEP)) { + pr_debug("clockdomain: %s does not support forcing " + "sleep via software\n", clkdm->name); + return -EINVAL; + } + + pr_debug("clockdomain: forcing sleep on %s\n", clkdm->name); + + if (cpu_is_omap24xx()) { + + cm_set_mod_reg_bits(OMAP24XX_FORCESTATE, + clkdm->pwrdm->prcm_offs, PM_PWSTCTRL); + + } else if (cpu_is_omap34xx()) { + + u32 v = (OMAP34XX_CLKSTCTRL_FORCE_SLEEP << + __ffs(clkdm->clktrctrl_mask)); + + cm_rmw_mod_reg_bits(clkdm->clktrctrl_mask, v, + clkdm->pwrdm->prcm_offs, CM_CLKSTCTRL); + + } else { + BUG(); + }; + + return 0; +} + +/** + * omap2_clkdm_wakeup - force clockdomain wakeup transition + * @clkdm: struct clockdomain * + * + * Instruct the CM to force a wakeup transition on the specified + * clockdomain 'clkdm'. Returns -EINVAL if clkdm is NULL or if the + * clockdomain does not support software-controlled wakeup; 0 upon + * success. + */ +int omap2_clkdm_wakeup(struct clockdomain *clkdm) +{ + if (!clkdm) + return -EINVAL; + + if (!(clkdm->flags & CLKDM_CAN_FORCE_WAKEUP)) { + pr_debug("clockdomain: %s does not support forcing " + "wakeup via software\n", clkdm->name); + return -EINVAL; + } + + pr_debug("clockdomain: forcing wakeup on %s\n", clkdm->name); + + if (cpu_is_omap24xx()) { + + cm_clear_mod_reg_bits(OMAP24XX_FORCESTATE, + clkdm->pwrdm->prcm_offs, PM_PWSTCTRL); + + } else if (cpu_is_omap34xx()) { + + u32 v = (OMAP34XX_CLKSTCTRL_FORCE_WAKEUP << + __ffs(clkdm->clktrctrl_mask)); + + cm_rmw_mod_reg_bits(clkdm->clktrctrl_mask, v, + clkdm->pwrdm->prcm_offs, CM_CLKSTCTRL); + + } else { + BUG(); + }; + + return 0; +} + +/** + * omap2_clkdm_allow_idle - enable hwsup idle transitions for clkdm + * @clkdm: struct clockdomain * + * + * Allow the hardware to automatically switch the clockdomain into + * active or idle states, as needed by downstream clocks. If the + * clockdomain has any downstream clocks enabled in the clock + * framework, wkdep/sleepdep autodependencies are added; this is so + * device drivers can read and write to the device. No return value. + */ +void omap2_clkdm_allow_idle(struct clockdomain *clkdm) +{ + u32 v; + + if (!clkdm) + return; + + if (!(clkdm->flags & CLKDM_CAN_ENABLE_AUTO)) { + pr_debug("clock: automatic idle transitions cannot be enabled " + "on clockdomain %s\n", clkdm->name); + return; + } + + pr_debug("clockdomain: enabling automatic idle transitions for %s\n", + clkdm->name); + + if (atomic_read(&clkdm->usecount) > 0) + _clkdm_add_autodeps(clkdm); + + if (cpu_is_omap24xx()) + v = OMAP24XX_CLKSTCTRL_ENABLE_AUTO; + else if (cpu_is_omap34xx()) + v = OMAP34XX_CLKSTCTRL_ENABLE_AUTO; + else + BUG(); + + + cm_rmw_mod_reg_bits(clkdm->clktrctrl_mask, + v << __ffs(clkdm->clktrctrl_mask), + clkdm->pwrdm->prcm_offs, + CM_CLKSTCTRL); +} + +/** + * omap2_clkdm_deny_idle - disable hwsup idle transitions for clkdm + * @clkdm: struct clockdomain * + * + * Prevent the hardware from automatically switching the clockdomain + * into inactive or idle states. If the clockdomain has downstream + * clocks enabled in the clock framework, wkdep/sleepdep + * autodependencies are removed. No return value. + */ +void omap2_clkdm_deny_idle(struct clockdomain *clkdm) +{ + u32 v; + + if (!clkdm) + return; + + if (!(clkdm->flags & CLKDM_CAN_DISABLE_AUTO)) { + pr_debug("clockdomain: automatic idle transitions cannot be " + "disabled on %s\n", clkdm->name); + return; + } + + pr_debug("clockdomain: disabling automatic idle transitions for %s\n", + clkdm->name); + + if (cpu_is_omap24xx()) + v = OMAP24XX_CLKSTCTRL_DISABLE_AUTO; + else if (cpu_is_omap34xx()) + v = OMAP34XX_CLKSTCTRL_DISABLE_AUTO; + else + BUG(); + + cm_rmw_mod_reg_bits(clkdm->clktrctrl_mask, + v << __ffs(clkdm->clktrctrl_mask), + clkdm->pwrdm->prcm_offs, CM_CLKSTCTRL); + + if (atomic_read(&clkdm->usecount) > 0) + _clkdm_del_autodeps(clkdm); +} + + +/* Clockdomain-to-clock framework interface code */ + +/** + * omap2_clkdm_clk_enable - add an enabled downstream clock to this clkdm + * @clkdm: struct clockdomain * + * @clk: struct clk * of the enabled downstream clock + * + * Increment the usecount of this clockdomain 'clkdm' and ensure that + * it is awake. Intended to be called by clk_enable() code. If the + * clockdomain is in software-supervised idle mode, force the + * clockdomain to wake. If the clockdomain is in hardware-supervised + * idle mode, add clkdm-pwrdm autodependencies, to ensure that devices + * in the clockdomain can be read from/written to by on-chip processors. + * Returns -EINVAL if passed null pointers; returns 0 upon success or + * if the clockdomain is in hwsup idle mode. + */ +int omap2_clkdm_clk_enable(struct clockdomain *clkdm, struct clk *clk) +{ + int v; + + /* + * XXX Rewrite this code to maintain a list of enabled + * downstream clocks for debugging purposes? + */ + + if (!clkdm || !clk) + return -EINVAL; + + if (atomic_inc_return(&clkdm->usecount) > 1) + return 0; + + /* Clockdomain now has one enabled downstream clock */ + + pr_debug("clockdomain: clkdm %s: clk %s now enabled\n", clkdm->name, + clk->name); + + v = omap2_clkdm_clktrctrl_read(clkdm); + + if ((cpu_is_omap34xx() && v == OMAP34XX_CLKSTCTRL_ENABLE_AUTO) || + (cpu_is_omap24xx() && v == OMAP24XX_CLKSTCTRL_ENABLE_AUTO)) + _clkdm_add_autodeps(clkdm); + else + omap2_clkdm_wakeup(clkdm); + + return 0; +} + +/** + * omap2_clkdm_clk_disable - remove an enabled downstream clock from this clkdm + * @clkdm: struct clockdomain * + * @clk: struct clk * of the disabled downstream clock + * + * Decrement the usecount of this clockdomain 'clkdm'. Intended to be + * called by clk_disable() code. If the usecount goes to 0, put the + * clockdomain to sleep (software-supervised mode) or remove the + * clkdm-pwrdm autodependencies (hardware-supervised mode). Returns + * -EINVAL if passed null pointers; -ERANGE if the clkdm usecount + * underflows and debugging is enabled; or returns 0 upon success or + * if the clockdomain is in hwsup idle mode. + */ +int omap2_clkdm_clk_disable(struct clockdomain *clkdm, struct clk *clk) +{ + int v; + + /* + * XXX Rewrite this code to maintain a list of enabled + * downstream clocks for debugging purposes? + */ + + if (!clkdm || !clk) + return -EINVAL; + +#ifdef DEBUG + if (atomic_read(&clkdm->usecount) == 0) { + WARN_ON(1); /* underflow */ + return -ERANGE; + } +#endif + + if (atomic_dec_return(&clkdm->usecount) > 0) + return 0; + + /* All downstream clocks of this clockdomain are now disabled */ + + pr_debug("clockdomain: clkdm %s: clk %s now disabled\n", clkdm->name, + clk->name); + + v = omap2_clkdm_clktrctrl_read(clkdm); + + if ((cpu_is_omap34xx() && v == OMAP34XX_CLKSTCTRL_ENABLE_AUTO) || + (cpu_is_omap24xx() && v == OMAP24XX_CLKSTCTRL_ENABLE_AUTO)) + _clkdm_del_autodeps(clkdm); + else + omap2_clkdm_sleep(clkdm); + + return 0; +} + diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig index e815fa3..ef62bf2 100644 --- a/arch/arm/plat-omap/Kconfig +++ b/arch/arm/plat-omap/Kconfig @@ -41,6 +41,18 @@ config OMAP_DEBUG_POWERDOMAIN for every powerdomain register write. However, the extra detail costs some memory. +config OMAP_DEBUG_CLOCKDOMAIN + bool "Emit debug messages from clockdomain layer" + depends on ARCH_OMAP2 || ARCH_OMAP3 + default n + help + Say Y here if you want to compile in clockdomain layer + debugging messages for OMAP2/3. These messages can + provide more detail as to why some clockdomain calls + may be failing, and will also emit a descriptive message + for every clockdomain register write. However, the + extra detail costs some memory. + config OMAP_RESET_CLOCKS bool "Reset unused clocks during boot" depends on ARCH_OMAP diff --git a/arch/arm/plat-omap/include/mach/clockdomain.h b/arch/arm/plat-omap/include/mach/clockdomain.h new file mode 100644 index 0000000..1f51f01 --- /dev/null +++ b/arch/arm/plat-omap/include/mach/clockdomain.h @@ -0,0 +1,106 @@ +/* + * linux/include/asm-arm/arch-omap/clockdomain.h + * + * OMAP2/3 clockdomain framework functions + * + * Copyright (C) 2008 Texas Instruments, Inc. + * Copyright (C) 2008 Nokia Corporation + * + * Written by Paul Walmsley + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_ARM_ARCH_OMAP_CLOCKDOMAIN_H +#define __ASM_ARM_ARCH_OMAP_CLOCKDOMAIN_H + +#include +#include +#include + +/* Clockdomain capability flags */ +#define CLKDM_CAN_FORCE_SLEEP (1 << 0) +#define CLKDM_CAN_FORCE_WAKEUP (1 << 1) +#define CLKDM_CAN_ENABLE_AUTO (1 << 2) +#define CLKDM_CAN_DISABLE_AUTO (1 << 3) + +#define CLKDM_CAN_HWSUP (CLKDM_CAN_ENABLE_AUTO | CLKDM_CAN_DISABLE_AUTO) +#define CLKDM_CAN_SWSUP (CLKDM_CAN_FORCE_SLEEP | CLKDM_CAN_FORCE_WAKEUP) +#define CLKDM_CAN_HWSUP_SWSUP (CLKDM_CAN_SWSUP | CLKDM_CAN_HWSUP) + +/* OMAP24XX CM_CLKSTCTRL_*.AUTOSTATE_* register bit values */ +#define OMAP24XX_CLKSTCTRL_DISABLE_AUTO 0x0 +#define OMAP24XX_CLKSTCTRL_ENABLE_AUTO 0x1 + +/* OMAP3XXX CM_CLKSTCTRL_*.CLKTRCTRL_* register bit values */ +#define OMAP34XX_CLKSTCTRL_DISABLE_AUTO 0x0 +#define OMAP34XX_CLKSTCTRL_FORCE_SLEEP 0x1 +#define OMAP34XX_CLKSTCTRL_FORCE_WAKEUP 0x2 +#define OMAP34XX_CLKSTCTRL_ENABLE_AUTO 0x3 + +/* + * struct clkdm_pwrdm_autodep - a powerdomain that should have wkdeps + * and sleepdeps added when a powerdomain should stay active in hwsup mode; + * and conversely, removed when the powerdomain should be allowed to go + * inactive in hwsup mode. + */ +struct clkdm_pwrdm_autodep { + + /* Name of the powerdomain to add a wkdep/sleepdep on */ + const char *pwrdm_name; + + /* Powerdomain pointer (looked up at clkdm_init() time) */ + struct powerdomain *pwrdm; + + /* OMAP chip types that this clockdomain dep is valid on */ + const struct omap_chip_id omap_chip; + +}; + +struct clockdomain { + + /* Clockdomain name */ + const char *name; + + /* Powerdomain enclosing this clockdomain */ + const char *pwrdm_name; + + /* CLKTRCTRL/AUTOSTATE field mask in CM_CLKSTCTRL reg */ + const u16 clktrctrl_mask; + + /* Clockdomain capability flags */ + const u8 flags; + + /* OMAP chip types that this clockdomain is valid on */ + const struct omap_chip_id omap_chip; + + /* Usecount tracking */ + atomic_t usecount; + + /* Powerdomain pointer assigned at clkdm_register() */ + struct powerdomain *pwrdm; + + struct list_head node; + +}; + +void clkdm_init(struct clockdomain **clkdms, struct clkdm_pwrdm_autodep *autodeps); +int clkdm_register(struct clockdomain *clkdm); +int clkdm_unregister(struct clockdomain *clkdm); +struct clockdomain *clkdm_lookup(const char *name); + +int clkdm_for_each(int (*fn)(struct clockdomain *clkdm)); +struct powerdomain *clkdm_get_pwrdm(struct clockdomain *clkdm); + +void omap2_clkdm_allow_idle(struct clockdomain *clkdm); +void omap2_clkdm_deny_idle(struct clockdomain *clkdm); + +int omap2_clkdm_wakeup(struct clockdomain *clkdm); +int omap2_clkdm_sleep(struct clockdomain *clkdm); + +int omap2_clkdm_clk_enable(struct clockdomain *clkdm, struct clk *clk); +int omap2_clkdm_clk_disable(struct clockdomain *clkdm, struct clk *clk); + +#endif -- cgit v0.10.2 From 8420bb13630032097be911a039cb64b5f62c01da Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Tue, 19 Aug 2008 11:08:44 +0300 Subject: ARM: OMAP2: Clockdomain: Connect clockdomain code to powerdomain code Thie patch adds code to the powerdomain layer to track the clockdomains associated with each powerdomain. It also modifies the clockdomain code to register clockdomains with their corresponding powerdomain when the clockdomain is registered. Signed-off-by: Paul Walmsley Signed-off-by: Tony Lindgren diff --git a/arch/arm/mach-omap2/clockdomain.c b/arch/arm/mach-omap2/clockdomain.c index f867d8f..b6ff5aa 100644 --- a/arch/arm/mach-omap2/clockdomain.c +++ b/arch/arm/mach-omap2/clockdomain.c @@ -219,6 +219,8 @@ int clkdm_register(struct clockdomain *clkdm) list_add(&clkdm->node, &clkdm_list); + pwrdm_add_clkdm(pwrdm, clkdm); + pr_debug("clockdomain: registered %s\n", clkdm->name); ret = 0; @@ -240,6 +242,8 @@ int clkdm_unregister(struct clockdomain *clkdm) if (!clkdm) return -EINVAL; + pwrdm_del_clkdm(clkdm->pwrdm, clkdm); + mutex_lock(&clkdm_mutex); list_del(&clkdm->node); mutex_unlock(&clkdm_mutex); diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c index 1ec0038..9a80349 100644 --- a/arch/arm/mach-omap2/powerdomain.c +++ b/arch/arm/mach-omap2/powerdomain.c @@ -33,6 +33,7 @@ #include #include +#include /* pwrdm_list contains all registered struct powerdomains */ static LIST_HEAD(pwrdm_list); @@ -237,6 +238,141 @@ int pwrdm_for_each(int (*fn)(struct powerdomain *pwrdm)) } /** + * pwrdm_add_clkdm - add a clockdomain to a powerdomain + * @pwrdm: struct powerdomain * to add the clockdomain to + * @clkdm: struct clockdomain * to associate with a powerdomain + * + * Associate the clockdomain 'clkdm' with a powerdomain 'pwrdm'. This + * enables the use of pwrdm_for_each_clkdm(). Returns -EINVAL if + * presented with invalid pointers; -ENOMEM if memory could not be allocated; + * or 0 upon success. + */ +int pwrdm_add_clkdm(struct powerdomain *pwrdm, struct clockdomain *clkdm) +{ + unsigned long flags; + int i; + int ret = -EINVAL; + + if (!pwrdm || !clkdm) + return -EINVAL; + + pr_debug("powerdomain: associating clockdomain %s with powerdomain " + "%s\n", clkdm->name, pwrdm->name); + + write_lock_irqsave(&pwrdm_rwlock, flags); + + for (i = 0; i < PWRDM_MAX_CLKDMS; i++) { + if (!pwrdm->pwrdm_clkdms[i]) + break; +#ifdef DEBUG + if (pwrdm->pwrdm_clkdms[i] == clkdm) { + ret = -EINVAL; + goto pac_exit; + } +#endif + } + + if (i == PWRDM_MAX_CLKDMS) { + pr_debug("powerdomain: increase PWRDM_MAX_CLKDMS for " + "pwrdm %s clkdm %s\n", pwrdm->name, clkdm->name); + WARN_ON(1); + ret = -ENOMEM; + goto pac_exit; + } + + pwrdm->pwrdm_clkdms[i] = clkdm; + + ret = 0; + +pac_exit: + write_unlock_irqrestore(&pwrdm_rwlock, flags); + + return ret; +} + +/** + * pwrdm_del_clkdm - remove a clockdomain from a powerdomain + * @pwrdm: struct powerdomain * to add the clockdomain to + * @clkdm: struct clockdomain * to associate with a powerdomain + * + * Dissociate the clockdomain 'clkdm' from the powerdomain + * 'pwrdm'. Returns -EINVAL if presented with invalid pointers; + * -ENOENT if the clkdm was not associated with the powerdomain, or 0 + * upon success. + */ +int pwrdm_del_clkdm(struct powerdomain *pwrdm, struct clockdomain *clkdm) +{ + unsigned long flags; + int ret = -EINVAL; + int i; + + if (!pwrdm || !clkdm) + return -EINVAL; + + pr_debug("powerdomain: dissociating clockdomain %s from powerdomain " + "%s\n", clkdm->name, pwrdm->name); + + write_lock_irqsave(&pwrdm_rwlock, flags); + + for (i = 0; i < PWRDM_MAX_CLKDMS; i++) + if (pwrdm->pwrdm_clkdms[i] == clkdm) + break; + + if (i == PWRDM_MAX_CLKDMS) { + pr_debug("powerdomain: clkdm %s not associated with pwrdm " + "%s ?!\n", clkdm->name, pwrdm->name); + ret = -ENOENT; + goto pdc_exit; + } + + pwrdm->pwrdm_clkdms[i] = NULL; + + ret = 0; + +pdc_exit: + write_unlock_irqrestore(&pwrdm_rwlock, flags); + + return ret; +} + +/** + * pwrdm_for_each_clkdm - call function on each clkdm in a pwrdm + * @pwrdm: struct powerdomain * to iterate over + * @fn: callback function * + * + * Call the supplied function for each clockdomain in the powerdomain + * 'pwrdm'. The callback function can return anything but 0 to bail + * out early from the iterator. The callback function is called with + * the pwrdm_rwlock held for reading, so no powerdomain structure + * manipulation functions should be called from the callback, although + * hardware powerdomain control functions are fine. Returns -EINVAL + * if presented with invalid pointers; or passes along the last return + * value of the callback function, which should be 0 for success or + * anything else to indicate failure. + */ +int pwrdm_for_each_clkdm(struct powerdomain *pwrdm, + int (*fn)(struct powerdomain *pwrdm, + struct clockdomain *clkdm)) +{ + unsigned long flags; + int ret = 0; + int i; + + if (!fn) + return -EINVAL; + + read_lock_irqsave(&pwrdm_rwlock, flags); + + for (i = 0; i < PWRDM_MAX_CLKDMS && !ret; i++) + ret = (*fn)(pwrdm, pwrdm->pwrdm_clkdms[i]); + + read_unlock_irqrestore(&pwrdm_rwlock, flags); + + return ret; +} + + +/** * pwrdm_add_wkdep - add a wakeup dependency from pwrdm2 to pwrdm1 * @pwrdm1: wake this struct powerdomain * up (dependent) * @pwrdm2: when this struct powerdomain * wakes up (source) diff --git a/arch/arm/plat-omap/include/mach/powerdomain.h b/arch/arm/plat-omap/include/mach/powerdomain.h index c8f52c6..5fa666f 100644 --- a/arch/arm/plat-omap/include/mach/powerdomain.h +++ b/arch/arm/plat-omap/include/mach/powerdomain.h @@ -44,9 +44,16 @@ */ #define PWRDM_MAX_MEM_BANKS 4 +/* + * Maximum number of clockdomains that can be associated with a powerdomain. + * CORE powerdomain is probably the worst case. + */ +#define PWRDM_MAX_CLKDMS 3 + /* XXX A completely arbitrary number. What is reasonable here? */ #define PWRDM_TRANSITION_BAILOUT 100000 +struct clockdomain; struct powerdomain; /* Encodes dependencies between powerdomains - statically defined */ @@ -98,6 +105,9 @@ struct powerdomain { /* Possible memory bank pwrstates when pwrdm is ON */ const u8 pwrsts_mem_on[PWRDM_MAX_MEM_BANKS]; + /* Clockdomains in this powerdomain */ + struct clockdomain *pwrdm_clkdms[PWRDM_MAX_CLKDMS]; + struct list_head node; }; @@ -111,6 +121,12 @@ struct powerdomain *pwrdm_lookup(const char *name); int pwrdm_for_each(int (*fn)(struct powerdomain *pwrdm)); +int pwrdm_add_clkdm(struct powerdomain *pwrdm, struct clockdomain *clkdm); +int pwrdm_del_clkdm(struct powerdomain *pwrdm, struct clockdomain *clkdm); +int pwrdm_for_each_clkdm(struct powerdomain *pwrdm, + int (*fn)(struct powerdomain *pwrdm, + struct clockdomain *clkdm)); + int pwrdm_add_wkdep(struct powerdomain *pwrdm1, struct powerdomain *pwrdm2); int pwrdm_del_wkdep(struct powerdomain *pwrdm1, struct powerdomain *pwrdm2); int pwrdm_read_wkdep(struct powerdomain *pwrdm1, struct powerdomain *pwrdm2); -- cgit v0.10.2 From 801954d3debb87af9fa7f9187cb1100175d76ac7 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Tue, 19 Aug 2008 11:08:44 +0300 Subject: ARM: OMAP2: Clockdomain: Encode OMAP2/3 clockdomains Add clockdomain definitions for OMAP24xx and OMAP34xx chips. Signed-off-by: Paul Walmsley Signed-off-by: Tony Lindgren diff --git a/arch/arm/mach-omap2/clockdomains.h b/arch/arm/mach-omap2/clockdomains.h new file mode 100644 index 0000000..a276320 --- /dev/null +++ b/arch/arm/mach-omap2/clockdomains.h @@ -0,0 +1,298 @@ +/* + * OMAP2/3 clockdomains + * + * Copyright (C) 2008 Texas Instruments, Inc. + * Copyright (C) 2008 Nokia Corporation + * + * Written by Paul Walmsley + */ + +#ifndef __ARCH_ARM_MACH_OMAP2_CLOCKDOMAINS_H +#define __ARCH_ARM_MACH_OMAP2_CLOCKDOMAINS_H + +#include + +/* + * OMAP2/3-common clockdomains + */ + +/* This is an implicit clockdomain - it is never defined as such in TRM */ +static struct clockdomain wkup_clkdm = { + .name = "wkup_clkdm", + .pwrdm_name = "wkup_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX | CHIP_IS_OMAP3430), +}; + +/* + * 2420-only clockdomains + */ + +#if defined(CONFIG_ARCH_OMAP2420) + +static struct clockdomain mpu_2420_clkdm = { + .name = "mpu_clkdm", + .pwrdm_name = "mpu_pwrdm", + .flags = CLKDM_CAN_HWSUP, + .clktrctrl_mask = OMAP24XX_AUTOSTATE_MPU_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP2420), +}; + +static struct clockdomain iva1_2420_clkdm = { + .name = "iva1_clkdm", + .pwrdm_name = "dsp_pwrdm", + .flags = CLKDM_CAN_HWSUP_SWSUP, + .clktrctrl_mask = OMAP2420_AUTOSTATE_IVA_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP2420), +}; + +#endif /* CONFIG_ARCH_OMAP2420 */ + + +/* + * 2430-only clockdomains + */ + +#if defined(CONFIG_ARCH_OMAP2430) + +static struct clockdomain mpu_2430_clkdm = { + .name = "mpu_clkdm", + .pwrdm_name = "mpu_pwrdm", + .flags = CLKDM_CAN_HWSUP_SWSUP, + .clktrctrl_mask = OMAP24XX_AUTOSTATE_MPU_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP2430), +}; + +static struct clockdomain mdm_clkdm = { + .name = "mdm_clkdm", + .pwrdm_name = "mdm_pwrdm", + .flags = CLKDM_CAN_HWSUP_SWSUP, + .clktrctrl_mask = OMAP2430_AUTOSTATE_MDM_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP2430), +}; + +#endif /* CONFIG_ARCH_OMAP2430 */ + + +/* + * 24XX-only clockdomains + */ + +#if defined(CONFIG_ARCH_OMAP24XX) + +static struct clockdomain dsp_clkdm = { + .name = "dsp_clkdm", + .pwrdm_name = "dsp_pwrdm", + .flags = CLKDM_CAN_HWSUP_SWSUP, + .clktrctrl_mask = OMAP24XX_AUTOSTATE_DSP_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX), +}; + +static struct clockdomain gfx_24xx_clkdm = { + .name = "gfx_clkdm", + .pwrdm_name = "gfx_pwrdm", + .flags = CLKDM_CAN_HWSUP_SWSUP, + .clktrctrl_mask = OMAP24XX_AUTOSTATE_GFX_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX), +}; + +static struct clockdomain core_l3_24xx_clkdm = { + .name = "core_l3_clkdm", + .pwrdm_name = "core_pwrdm", + .flags = CLKDM_CAN_HWSUP, + .clktrctrl_mask = OMAP24XX_AUTOSTATE_L3_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX), +}; + +static struct clockdomain core_l4_24xx_clkdm = { + .name = "core_l4_clkdm", + .pwrdm_name = "core_pwrdm", + .flags = CLKDM_CAN_HWSUP, + .clktrctrl_mask = OMAP24XX_AUTOSTATE_L4_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX), +}; + +static struct clockdomain dss_24xx_clkdm = { + .name = "dss_clkdm", + .pwrdm_name = "core_pwrdm", + .flags = CLKDM_CAN_HWSUP, + .clktrctrl_mask = OMAP24XX_AUTOSTATE_DSS_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP24XX), +}; + +#endif /* CONFIG_ARCH_OMAP24XX */ + + +/* + * 34xx clockdomains + */ + +#if defined(CONFIG_ARCH_OMAP34XX) + +static struct clockdomain mpu_34xx_clkdm = { + .name = "mpu_clkdm", + .pwrdm_name = "mpu_pwrdm", + .flags = CLKDM_CAN_HWSUP | CLKDM_CAN_FORCE_WAKEUP, + .clktrctrl_mask = OMAP3430_CLKTRCTRL_MPU_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430), +}; + +static struct clockdomain neon_clkdm = { + .name = "neon_clkdm", + .pwrdm_name = "neon_pwrdm", + .flags = CLKDM_CAN_HWSUP_SWSUP, + .clktrctrl_mask = OMAP3430_CLKTRCTRL_NEON_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430), +}; + +static struct clockdomain iva2_clkdm = { + .name = "iva2_clkdm", + .pwrdm_name = "iva2_pwrdm", + .flags = CLKDM_CAN_HWSUP_SWSUP, + .clktrctrl_mask = OMAP3430_CLKTRCTRL_IVA2_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430), +}; + +static struct clockdomain gfx_3430es1_clkdm = { + .name = "gfx_clkdm", + .pwrdm_name = "gfx_pwrdm", + .flags = CLKDM_CAN_HWSUP_SWSUP, + .clktrctrl_mask = OMAP3430ES1_CLKTRCTRL_GFX_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430ES1), +}; + +static struct clockdomain sgx_clkdm = { + .name = "sgx_clkdm", + .pwrdm_name = "sgx_pwrdm", + .flags = CLKDM_CAN_HWSUP_SWSUP, + .clktrctrl_mask = OMAP3430ES2_CLKTRCTRL_SGX_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430ES2), +}; + +static struct clockdomain d2d_clkdm = { + .name = "d2d_clkdm", + .pwrdm_name = "core_pwrdm", + .flags = CLKDM_CAN_HWSUP, + .clktrctrl_mask = OMAP3430ES1_CLKTRCTRL_D2D_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430ES1), +}; + +static struct clockdomain core_l3_34xx_clkdm = { + .name = "core_l3_clkdm", + .pwrdm_name = "core_pwrdm", + .flags = CLKDM_CAN_HWSUP, + .clktrctrl_mask = OMAP3430_CLKTRCTRL_L3_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430), +}; + +static struct clockdomain core_l4_34xx_clkdm = { + .name = "core_l4_clkdm", + .pwrdm_name = "core_pwrdm", + .flags = CLKDM_CAN_HWSUP, + .clktrctrl_mask = OMAP3430_CLKTRCTRL_L4_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430), +}; + +static struct clockdomain dss_34xx_clkdm = { + .name = "dss_clkdm", + .pwrdm_name = "dss_pwrdm", + .flags = CLKDM_CAN_HWSUP_SWSUP, + .clktrctrl_mask = OMAP3430_CLKTRCTRL_DSS_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430), +}; + +static struct clockdomain cam_clkdm = { + .name = "cam_clkdm", + .pwrdm_name = "cam_pwrdm", + .flags = CLKDM_CAN_HWSUP_SWSUP, + .clktrctrl_mask = OMAP3430_CLKTRCTRL_CAM_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430), +}; + +static struct clockdomain usbhost_clkdm = { + .name = "usbhost_clkdm", + .pwrdm_name = "usbhost_pwrdm", + .flags = CLKDM_CAN_HWSUP_SWSUP, + .clktrctrl_mask = OMAP3430ES2_CLKTRCTRL_USBHOST_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430ES2), +}; + +static struct clockdomain per_clkdm = { + .name = "per_clkdm", + .pwrdm_name = "per_pwrdm", + .flags = CLKDM_CAN_HWSUP_SWSUP, + .clktrctrl_mask = OMAP3430_CLKTRCTRL_PER_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430), +}; + +static struct clockdomain emu_clkdm = { + .name = "emu_clkdm", + .pwrdm_name = "emu_pwrdm", + .flags = CLKDM_CAN_ENABLE_AUTO | CLKDM_CAN_SWSUP, + .clktrctrl_mask = OMAP3430_CLKTRCTRL_EMU_MASK, + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430), +}; + +#endif /* CONFIG_ARCH_OMAP34XX */ + +/* + * Clockdomain-powerdomain hwsup dependencies (34XX only) + */ + +static struct clkdm_pwrdm_autodep clkdm_pwrdm_autodeps[] = { + { + .pwrdm_name = "mpu_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { + .pwrdm_name = "iva2_pwrdm", + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430) + }, + { NULL } +}; + +/* + * + */ + +static struct clockdomain *clockdomains_omap[] = { + + &wkup_clkdm, + +#ifdef CONFIG_ARCH_OMAP2420 + &mpu_2420_clkdm, + &iva1_2420_clkdm, +#endif + +#ifdef CONFIG_ARCH_OMAP2430 + &mpu_2430_clkdm, + &mdm_clkdm, +#endif + +#ifdef CONFIG_ARCH_OMAP24XX + &dsp_clkdm, + &gfx_24xx_clkdm, + &core_l3_24xx_clkdm, + &core_l4_24xx_clkdm, + &dss_24xx_clkdm, +#endif + +#ifdef CONFIG_ARCH_OMAP34XX + &mpu_34xx_clkdm, + &neon_clkdm, + &iva2_clkdm, + &gfx_3430es1_clkdm, + &sgx_clkdm, + &d2d_clkdm, + &core_l3_34xx_clkdm, + &core_l4_34xx_clkdm, + &dss_34xx_clkdm, + &cam_clkdm, + &usbhost_clkdm, + &per_clkdm, + &emu_clkdm, +#endif + + NULL, +}; + +#endif diff --git a/arch/arm/mach-omap2/cm-regbits-24xx.h b/arch/arm/mach-omap2/cm-regbits-24xx.h index 20ac381..1098ecf 100644 --- a/arch/arm/mach-omap2/cm-regbits-24xx.h +++ b/arch/arm/mach-omap2/cm-regbits-24xx.h @@ -63,7 +63,8 @@ #define OMAP24XX_CLKSEL_MPU_MASK (0x1f << 0) /* CM_CLKSTCTRL_MPU */ -#define OMAP24XX_AUTOSTATE_MPU (1 << 0) +#define OMAP24XX_AUTOSTATE_MPU_SHIFT 0 +#define OMAP24XX_AUTOSTATE_MPU_MASK (1 << 0) /* CM_FCLKEN1_CORE specific bits*/ #define OMAP24XX_EN_TV_SHIFT 2 @@ -238,9 +239,12 @@ #define OMAP24XX_CLKSEL_GPT2_MASK (0x3 << 2) /* CM_CLKSTCTRL_CORE */ -#define OMAP24XX_AUTOSTATE_DSS (1 << 2) -#define OMAP24XX_AUTOSTATE_L4 (1 << 1) -#define OMAP24XX_AUTOSTATE_L3 (1 << 0) +#define OMAP24XX_AUTOSTATE_DSS_SHIFT 2 +#define OMAP24XX_AUTOSTATE_DSS_MASK (1 << 2) +#define OMAP24XX_AUTOSTATE_L4_SHIFT 1 +#define OMAP24XX_AUTOSTATE_L4_MASK (1 << 1) +#define OMAP24XX_AUTOSTATE_L3_SHIFT 0 +#define OMAP24XX_AUTOSTATE_L3_MASK (1 << 0) /* CM_FCLKEN_GFX */ #define OMAP24XX_EN_3D_SHIFT 2 @@ -255,7 +259,8 @@ /* CM_CLKSEL_GFX specific bits */ /* CM_CLKSTCTRL_GFX */ -#define OMAP24XX_AUTOSTATE_GFX (1 << 0) +#define OMAP24XX_AUTOSTATE_GFX_SHIFT 0 +#define OMAP24XX_AUTOSTATE_GFX_MASK (1 << 0) /* CM_FCLKEN_WKUP specific bits */ @@ -367,8 +372,10 @@ #define OMAP24XX_CLKSEL_DSP_MASK (0x1f << 0) /* CM_CLKSTCTRL_DSP */ -#define OMAP2420_AUTOSTATE_IVA (1 << 8) -#define OMAP24XX_AUTOSTATE_DSP (1 << 0) +#define OMAP2420_AUTOSTATE_IVA_SHIFT 8 +#define OMAP2420_AUTOSTATE_IVA_MASK (1 << 8) +#define OMAP24XX_AUTOSTATE_DSP_SHIFT 0 +#define OMAP24XX_AUTOSTATE_DSP_MASK (1 << 0) /* CM_FCLKEN_MDM */ /* 2430 only */ @@ -396,6 +403,7 @@ /* CM_CLKSTCTRL_MDM */ /* 2430 only */ -#define OMAP2430_AUTOSTATE_MDM (1 << 0) +#define OMAP2430_AUTOSTATE_MDM_SHIFT 0 +#define OMAP2430_AUTOSTATE_MDM_MASK (1 << 0) #endif diff --git a/arch/arm/mach-omap2/cm-regbits-34xx.h b/arch/arm/mach-omap2/cm-regbits-34xx.h index ee4c0ca..219f5c8 100644 --- a/arch/arm/mach-omap2/cm-regbits-34xx.h +++ b/arch/arm/mach-omap2/cm-regbits-34xx.h @@ -96,7 +96,8 @@ #define OMAP3430_CLKTRCTRL_IVA2_MASK (0x3 << 0) /* CM_CLKSTST_IVA2 */ -#define OMAP3430_CLKACTIVITY_IVA2 (1 << 0) +#define OMAP3430_CLKACTIVITY_IVA2_SHIFT 0 +#define OMAP3430_CLKACTIVITY_IVA2_MASK (1 << 0) /* CM_REVISION specific bits */ @@ -140,7 +141,8 @@ #define OMAP3430_CLKTRCTRL_MPU_MASK (0x3 << 0) /* CM_CLKSTST_MPU */ -#define OMAP3430_CLKACTIVITY_MPU (1 << 0) +#define OMAP3430_CLKACTIVITY_MPU_SHIFT 0 +#define OMAP3430_CLKACTIVITY_MPU_MASK (1 << 0) /* CM_FCLKEN1_CORE specific bits */ @@ -300,9 +302,12 @@ #define OMAP3430_CLKTRCTRL_L3_MASK (0x3 << 0) /* CM_CLKSTST_CORE */ -#define OMAP3430ES1_CLKACTIVITY_D2D (1 << 2) -#define OMAP3430_CLKACTIVITY_L4 (1 << 1) -#define OMAP3430_CLKACTIVITY_L3 (1 << 0) +#define OMAP3430ES1_CLKACTIVITY_D2D_SHIFT 2 +#define OMAP3430ES1_CLKACTIVITY_D2D_MASK (1 << 2) +#define OMAP3430_CLKACTIVITY_L4_SHIFT 1 +#define OMAP3430_CLKACTIVITY_L4_MASK (1 << 1) +#define OMAP3430_CLKACTIVITY_L3_SHIFT 0 +#define OMAP3430_CLKACTIVITY_L3_MASK (1 << 0) /* CM_FCLKEN_GFX */ #define OMAP3430ES1_EN_3D (1 << 2) @@ -323,7 +328,8 @@ #define OMAP3430ES1_CLKTRCTRL_GFX_MASK (0x3 << 0) /* CM_CLKSTST_GFX */ -#define OMAP3430ES1_CLKACTIVITY_GFX (1 << 0) +#define OMAP3430ES1_CLKACTIVITY_GFX_SHIFT 0 +#define OMAP3430ES1_CLKACTIVITY_GFX_MASK (1 << 0) /* CM_FCLKEN_SGX */ #define OMAP3430ES2_EN_SGX_SHIFT 1 @@ -333,6 +339,14 @@ #define OMAP3430ES2_CLKSEL_SGX_SHIFT 0 #define OMAP3430ES2_CLKSEL_SGX_MASK (0x7 << 0) +/* CM_CLKSTCTRL_SGX */ +#define OMAP3430ES2_CLKTRCTRL_SGX_SHIFT 0 +#define OMAP3430ES2_CLKTRCTRL_SGX_MASK (0x3 << 0) + +/* CM_CLKSTST_SGX */ +#define OMAP3430ES2_CLKACTIVITY_SGX_SHIFT 0 +#define OMAP3430ES2_CLKACTIVITY_SGX_MASK (1 << 0) + /* CM_FCLKEN_WKUP specific bits */ #define OMAP3430ES2_EN_USIMOCP_SHIFT 9 @@ -498,7 +512,8 @@ #define OMAP3430_CLKTRCTRL_DSS_MASK (0x3 << 0) /* CM_CLKSTST_DSS */ -#define OMAP3430_CLKACTIVITY_DSS (1 << 0) +#define OMAP3430_CLKACTIVITY_DSS_SHIFT 0 +#define OMAP3430_CLKACTIVITY_DSS_MASK (1 << 0) /* CM_FCLKEN_CAM specific bits */ @@ -522,7 +537,8 @@ #define OMAP3430_CLKTRCTRL_CAM_MASK (0x3 << 0) /* CM_CLKSTST_CAM */ -#define OMAP3430_CLKACTIVITY_CAM (1 << 0) +#define OMAP3430_CLKACTIVITY_CAM_SHIFT 0 +#define OMAP3430_CLKACTIVITY_CAM_MASK (1 << 0) /* CM_FCLKEN_PER specific bits */ @@ -598,7 +614,8 @@ #define OMAP3430_CLKTRCTRL_PER_MASK (0x3 << 0) /* CM_CLKSTST_PER */ -#define OMAP3430_CLKACTIVITY_PER (1 << 0) +#define OMAP3430_CLKACTIVITY_PER_SHIFT 0 +#define OMAP3430_CLKACTIVITY_PER_MASK (1 << 0) /* CM_CLKSEL1_EMU */ #define OMAP3430_DIV_DPLL4_SHIFT 24 @@ -623,7 +640,8 @@ #define OMAP3430_CLKTRCTRL_EMU_MASK (0x3 << 0) /* CM_CLKSTST_EMU */ -#define OMAP3430_CLKACTIVITY_EMU (1 << 0) +#define OMAP3430_CLKACTIVITY_EMU_SHIFT 0 +#define OMAP3430_CLKACTIVITY_EMU_MASK (1 << 0) /* CM_CLKSEL2_EMU specific bits */ #define OMAP3430_CORE_DPLL_EMU_MULT_SHIFT 8 @@ -673,6 +691,8 @@ #define OMAP3430ES2_CLKTRCTRL_USBHOST_SHIFT 0 #define OMAP3430ES2_CLKTRCTRL_USBHOST_MASK (3 << 0) - +/* CM_CLKSTST_USBHOST */ +#define OMAP3430ES2_CLKACTIVITY_USBHOST_SHIFT 0 +#define OMAP3430ES2_CLKACTIVITY_USBHOST_MASK (1 << 0) #endif diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 5f73cf0..371e540 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -28,6 +28,9 @@ #include "powerdomains.h" +#include +#include "clockdomains.h" + extern void omap_sram_init(void); extern int omap2_clk_init(void); extern void omap2_check_revision(void); @@ -106,6 +109,7 @@ void __init omap2_init_common_hw(void) { omap2_mux_init(); pwrdm_init(powerdomains_omap); + clkdm_init(clockdomains_omap, clkdm_pwrdm_autodeps); omap2_clk_init(); /* * Need to Fix this for 2430 -- cgit v0.10.2 From d1b03f615ae7ede957551dd26bf8929bdc2bb786 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Tue, 19 Aug 2008 11:08:44 +0300 Subject: ARM: OMAP2: Clockdomain: Associate clocks with clockdomains Associate each OMAP24xx clock in arch/arm/mach-omap2/clock24xx.h with a clockdomain. Also move the L4 clock up higher in the file in preparation to define the SSI L4 iclk. Signed-off-by: Paul Walmsley Signed-off-by: Tony Lindgren diff --git a/arch/arm/mach-omap2/clock24xx.h b/arch/arm/mach-omap2/clock24xx.h index be4e255..242a19d 100644 --- a/arch/arm/mach-omap2/clock24xx.h +++ b/arch/arm/mach-omap2/clock24xx.h @@ -626,6 +626,7 @@ static struct clk func_32k_ck = { .rate = 32000, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | RATE_FIXED | ALWAYS_ENABLED | RATE_PROPAGATES, + .clkdm_name = "wkup_clkdm", .recalc = &propagate_rate, }; @@ -634,17 +635,19 @@ static struct clk osc_ck = { /* (*12, *13, 19.2, *26, 38.4)MHz */ .name = "osc_ck", .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | RATE_PROPAGATES, + .clkdm_name = "wkup_clkdm", .enable = &omap2_enable_osc_ck, .disable = &omap2_disable_osc_ck, .recalc = &omap2_osc_clk_recalc, }; -/* With out modem likely 12MHz, with modem likely 13MHz */ +/* Without modem likely 12MHz, with modem likely 13MHz */ static struct clk sys_ck = { /* (*12, *13, 19.2, 26, 38.4)MHz */ .name = "sys_ck", /* ~ ref_clk also */ .parent = &osc_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | ALWAYS_ENABLED | RATE_PROPAGATES, + .clkdm_name = "wkup_clkdm", .recalc = &omap2_sys_clk_recalc, }; @@ -653,6 +656,7 @@ static struct clk alt_ck = { /* Typical 54M or 48M, may not exist */ .rate = 54000000, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | RATE_FIXED | ALWAYS_ENABLED | RATE_PROPAGATES, + .clkdm_name = "wkup_clkdm", .recalc = &propagate_rate, }; @@ -684,6 +688,7 @@ static struct clk dpll_ck = { .dpll_data = &dpll_dd, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | RATE_PROPAGATES | ALWAYS_ENABLED, + .clkdm_name = "wkup_clkdm", .recalc = &omap2_dpllcore_recalc, .set_rate = &omap2_reprogram_dpllcore, }; @@ -694,6 +699,7 @@ static struct clk apll96_ck = { .rate = 96000000, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | RATE_FIXED | RATE_PROPAGATES | ENABLE_ON_INIT, + .clkdm_name = "wkup_clkdm", .enable_reg = OMAP_CM_REGADDR(PLL_MOD, CM_CLKEN), .enable_bit = OMAP24XX_EN_96M_PLL_SHIFT, .enable = &omap2_clk_fixed_enable, @@ -707,6 +713,7 @@ static struct clk apll54_ck = { .rate = 54000000, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | RATE_FIXED | RATE_PROPAGATES | ENABLE_ON_INIT, + .clkdm_name = "wkup_clkdm", .enable_reg = OMAP_CM_REGADDR(PLL_MOD, CM_CLKEN), .enable_bit = OMAP24XX_EN_54M_PLL_SHIFT, .enable = &omap2_clk_fixed_enable, @@ -741,6 +748,7 @@ static struct clk func_54m_ck = { .parent = &apll54_ck, /* can also be alt_clk */ .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | RATE_PROPAGATES | PARENT_CONTROLS_CLOCK, + .clkdm_name = "wkup_clkdm", .init = &omap2_init_clksel_parent, .clksel_reg = OMAP_CM_REGADDR(PLL_MOD, CM_CLKSEL1), .clksel_mask = OMAP24XX_54M_SOURCE, @@ -753,6 +761,7 @@ static struct clk core_ck = { .parent = &dpll_ck, /* can also be 32k */ .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | ALWAYS_ENABLED | RATE_PROPAGATES, + .clkdm_name = "wkup_clkdm", .recalc = &followparent_recalc, }; @@ -779,6 +788,7 @@ static struct clk func_96m_ck = { .parent = &apll96_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | RATE_PROPAGATES | PARENT_CONTROLS_CLOCK, + .clkdm_name = "wkup_clkdm", .init = &omap2_init_clksel_parent, .clksel_reg = OMAP_CM_REGADDR(PLL_MOD, CM_CLKSEL1), .clksel_mask = OMAP2430_96M_SOURCE, @@ -811,6 +821,7 @@ static struct clk func_48m_ck = { .parent = &apll96_ck, /* 96M or Alt */ .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | RATE_PROPAGATES | PARENT_CONTROLS_CLOCK, + .clkdm_name = "wkup_clkdm", .init = &omap2_init_clksel_parent, .clksel_reg = OMAP_CM_REGADDR(PLL_MOD, CM_CLKSEL1), .clksel_mask = OMAP24XX_48M_SOURCE, @@ -826,6 +837,7 @@ static struct clk func_12m_ck = { .fixed_div = 4, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | RATE_PROPAGATES | PARENT_CONTROLS_CLOCK, + .clkdm_name = "wkup_clkdm", .recalc = &omap2_fixed_divisor_recalc, }; @@ -878,6 +890,7 @@ static struct clk sys_clkout_src = { .parent = &func_54m_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | RATE_PROPAGATES, + .clkdm_name = "wkup_clkdm", .enable_reg = OMAP24XX_PRCM_CLKOUT_CTRL, .enable_bit = OMAP24XX_CLKOUT_EN_SHIFT, .init = &omap2_init_clksel_parent, @@ -908,6 +921,7 @@ static struct clk sys_clkout = { .parent = &sys_clkout_src, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | PARENT_CONTROLS_CLOCK, + .clkdm_name = "wkup_clkdm", .clksel_reg = OMAP24XX_PRCM_CLKOUT_CTRL, .clksel_mask = OMAP24XX_CLKOUT_DIV_MASK, .clksel = sys_clkout_clksel, @@ -921,6 +935,7 @@ static struct clk sys_clkout2_src = { .name = "sys_clkout2_src", .parent = &func_54m_ck, .flags = CLOCK_IN_OMAP242X | RATE_PROPAGATES, + .clkdm_name = "wkup_clkdm", .enable_reg = OMAP24XX_PRCM_CLKOUT_CTRL, .enable_bit = OMAP2420_CLKOUT2_EN_SHIFT, .init = &omap2_init_clksel_parent, @@ -942,6 +957,7 @@ static struct clk sys_clkout2 = { .name = "sys_clkout2", .parent = &sys_clkout2_src, .flags = CLOCK_IN_OMAP242X | PARENT_CONTROLS_CLOCK, + .clkdm_name = "wkup_clkdm", .clksel_reg = OMAP24XX_PRCM_CLKOUT_CTRL, .clksel_mask = OMAP2420_CLKOUT2_DIV_MASK, .clksel = sys_clkout2_clksel, @@ -954,6 +970,7 @@ static struct clk emul_ck = { .name = "emul_ck", .parent = &func_54m_ck, .flags = CLOCK_IN_OMAP242X, + .clkdm_name = "wkup_clkdm", .enable_reg = OMAP24XX_PRCM_CLKEMUL_CTRL, .enable_bit = OMAP24XX_EMULATION_EN_SHIFT, .recalc = &followparent_recalc, @@ -990,12 +1007,13 @@ static struct clk mpu_ck = { /* Control cpu */ .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | ALWAYS_ENABLED | DELAYED_APP | CONFIG_PARTICIPANT | RATE_PROPAGATES, + .clkdm_name = "mpu_clkdm", .init = &omap2_init_clksel_parent, .clksel_reg = OMAP_CM_REGADDR(MPU_MOD, CM_CLKSEL), .clksel_mask = OMAP24XX_CLKSEL_MPU_MASK, .clksel = mpu_clksel, .recalc = &omap2_clksel_recalc, - .round_rate = &omap2_clksel_round_rate, + .round_rate = &omap2_clksel_round_rate, .set_rate = &omap2_clksel_set_rate }; @@ -1031,6 +1049,7 @@ static struct clk dsp_fck = { .parent = &core_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | DELAYED_APP | CONFIG_PARTICIPANT | RATE_PROPAGATES, + .clkdm_name = "dsp_clkdm", .enable_reg = OMAP_CM_REGADDR(OMAP24XX_DSP_MOD, CM_FCLKEN), .enable_bit = OMAP24XX_CM_FCLKEN_DSP_EN_DSP_SHIFT, .clksel_reg = OMAP_CM_REGADDR(OMAP24XX_DSP_MOD, CM_CLKSEL), @@ -1054,10 +1073,7 @@ static const struct clksel dsp_irate_ick_clksel[] = { { .parent = NULL } }; -/* - * This clock does not exist as such in the TRM, but is added to - * separate source selection from XXX - */ +/* This clock does not exist as such in the TRM. */ static struct clk dsp_irate_ick = { .name = "dsp_irate_ick", .parent = &dsp_fck, @@ -1089,11 +1105,17 @@ static struct clk iva2_1_ick = { .enable_bit = OMAP24XX_CM_FCLKEN_DSP_EN_DSP_SHIFT, }; +/* + * The IVA1 is an ARM7 core on the 2420 that has nothing to do with + * the C54x, but which is contained in the DSP powerdomain. Does not + * exist on later OMAPs. + */ static struct clk iva1_ifck = { .name = "iva1_ifck", .parent = &core_ck, .flags = CLOCK_IN_OMAP242X | CONFIG_PARTICIPANT | RATE_PROPAGATES | DELAYED_APP, + .clkdm_name = "iva1_clkdm", .enable_reg = OMAP_CM_REGADDR(OMAP24XX_DSP_MOD, CM_FCLKEN), .enable_bit = OMAP2420_EN_IVA_COP_SHIFT, .clksel_reg = OMAP_CM_REGADDR(OMAP24XX_DSP_MOD, CM_CLKSEL), @@ -1109,6 +1131,7 @@ static struct clk iva1_mpu_int_ifck = { .name = "iva1_mpu_int_ifck", .parent = &iva1_ifck, .flags = CLOCK_IN_OMAP242X, + .clkdm_name = "iva1_clkdm", .enable_reg = OMAP_CM_REGADDR(OMAP24XX_DSP_MOD, CM_FCLKEN), .enable_bit = OMAP2420_EN_IVA_MPU_SHIFT, .fixed_div = 2, @@ -1156,6 +1179,7 @@ static struct clk core_l3_ck = { /* Used for ick and fck, interconnect */ .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | ALWAYS_ENABLED | DELAYED_APP | CONFIG_PARTICIPANT | RATE_PROPAGATES, + .clkdm_name = "core_l3_clkdm", .clksel_reg = OMAP_CM_REGADDR(CORE_MOD, CM_CLKSEL1), .clksel_mask = OMAP24XX_CLKSEL_L3_MASK, .clksel = core_l3_clksel, @@ -1177,11 +1201,13 @@ static const struct clksel usb_l4_ick_clksel[] = { { .parent = NULL }, }; +/* It is unclear from TRM whether usb_l4_ick is really in L3 or L4 clkdm */ static struct clk usb_l4_ick = { /* FS-USB interface clock */ .name = "usb_l4_ick", .parent = &core_l3_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | DELAYED_APP | CONFIG_PARTICIPANT, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN2), .enable_bit = OMAP24XX_EN_USB_SHIFT, .clksel_reg = OMAP_CM_REGADDR(CORE_MOD, CM_CLKSEL1), @@ -1193,10 +1219,42 @@ static struct clk usb_l4_ick = { /* FS-USB interface clock */ }; /* + * L4 clock management domain + * + * This domain contains lots of interface clocks from the L4 interface, some + * functional clocks. Fixed APLL functional source clocks are managed in + * this domain. + */ +static const struct clksel_rate l4_core_l3_rates[] = { + { .div = 1, .val = 1, .flags = RATE_IN_24XX | DEFAULT_RATE }, + { .div = 2, .val = 2, .flags = RATE_IN_24XX }, + { .div = 0 } +}; + +static const struct clksel l4_clksel[] = { + { .parent = &core_l3_ck, .rates = l4_core_l3_rates }, + { .parent = NULL } +}; + +static struct clk l4_ck = { /* used both as an ick and fck */ + .name = "l4_ck", + .parent = &core_l3_ck, + .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | + ALWAYS_ENABLED | DELAYED_APP | RATE_PROPAGATES, + .clkdm_name = "core_l4_clkdm", + .clksel_reg = OMAP_CM_REGADDR(CORE_MOD, CM_CLKSEL1), + .clksel_mask = OMAP24XX_CLKSEL_L4_MASK, + .clksel = l4_clksel, + .recalc = &omap2_clksel_recalc, + .round_rate = &omap2_clksel_round_rate, + .set_rate = &omap2_clksel_set_rate +}; + +/* * SSI is in L3 management domain, its direct parent is core not l3, * many core power domain entities are grouped into the L3 clock * domain. - * SSI_SSR_FCLK, SSI_SST_FCLK, SSI_L4_CLIK + * SSI_SSR_FCLK, SSI_SST_FCLK, SSI_L4_ICLK * * ssr = core/1/2/3/4/5, sst = 1/2 ssr. */ @@ -1221,6 +1279,7 @@ static struct clk ssi_ssr_sst_fck = { .parent = &core_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | DELAYED_APP, + .clkdm_name = "core_l3_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, OMAP24XX_CM_FCLKEN2), .enable_bit = OMAP24XX_EN_SSI_SHIFT, .clksel_reg = OMAP_CM_REGADDR(CORE_MOD, CM_CLKSEL1), @@ -1231,6 +1290,7 @@ static struct clk ssi_ssr_sst_fck = { .set_rate = &omap2_clksel_set_rate }; + /* * GFX clock domain * Clocks: @@ -1254,6 +1314,7 @@ static struct clk gfx_3d_fck = { .name = "gfx_3d_fck", .parent = &core_l3_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "gfx_clkdm", .enable_reg = OMAP_CM_REGADDR(GFX_MOD, CM_FCLKEN), .enable_bit = OMAP24XX_EN_3D_SHIFT, .clksel_reg = OMAP_CM_REGADDR(GFX_MOD, CM_CLKSEL), @@ -1268,6 +1329,7 @@ static struct clk gfx_2d_fck = { .name = "gfx_2d_fck", .parent = &core_l3_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "gfx_clkdm", .enable_reg = OMAP_CM_REGADDR(GFX_MOD, CM_FCLKEN), .enable_bit = OMAP24XX_EN_2D_SHIFT, .clksel_reg = OMAP_CM_REGADDR(GFX_MOD, CM_CLKSEL), @@ -1282,6 +1344,7 @@ static struct clk gfx_ick = { .name = "gfx_ick", /* From l3 */ .parent = &core_l3_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "gfx_clkdm", .enable_reg = OMAP_CM_REGADDR(GFX_MOD, CM_ICLKEN), .enable_bit = OMAP_EN_GFX_SHIFT, .recalc = &followparent_recalc, @@ -1311,6 +1374,7 @@ static struct clk mdm_ick = { /* used both as a ick and fck */ .name = "mdm_ick", .parent = &core_ck, .flags = CLOCK_IN_OMAP243X | DELAYED_APP | CONFIG_PARTICIPANT, + .clkdm_name = "mdm_clkdm", .enable_reg = OMAP_CM_REGADDR(OMAP2430_MDM_MOD, CM_ICLKEN), .enable_bit = OMAP2430_CM_ICLKEN_MDM_EN_MDM_SHIFT, .clksel_reg = OMAP_CM_REGADDR(OMAP2430_MDM_MOD, CM_CLKSEL), @@ -1325,52 +1389,13 @@ static struct clk mdm_osc_ck = { .name = "mdm_osc_ck", .parent = &osc_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "mdm_clkdm", .enable_reg = OMAP_CM_REGADDR(OMAP2430_MDM_MOD, CM_FCLKEN), .enable_bit = OMAP2430_EN_OSC_SHIFT, .recalc = &followparent_recalc, }; /* - * L4 clock management domain - * - * This domain contains lots of interface clocks from the L4 interface, some - * functional clocks. Fixed APLL functional source clocks are managed in - * this domain. - */ -static const struct clksel_rate l4_core_l3_rates[] = { - { .div = 1, .val = 1, .flags = RATE_IN_24XX | DEFAULT_RATE }, - { .div = 2, .val = 2, .flags = RATE_IN_24XX }, - { .div = 0 } -}; - -static const struct clksel l4_clksel[] = { - { .parent = &core_l3_ck, .rates = l4_core_l3_rates }, - { .parent = NULL } -}; - -static struct clk l4_ck = { /* used both as an ick and fck */ - .name = "l4_ck", - .parent = &core_l3_ck, - .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | - ALWAYS_ENABLED | DELAYED_APP | RATE_PROPAGATES, - .clksel_reg = OMAP_CM_REGADDR(CORE_MOD, CM_CLKSEL1), - .clksel_mask = OMAP24XX_CLKSEL_L4_MASK, - .clksel = l4_clksel, - .recalc = &omap2_clksel_recalc, - .round_rate = &omap2_clksel_round_rate, - .set_rate = &omap2_clksel_set_rate -}; - -static struct clk ssi_l4_ick = { - .name = "ssi_l4_ick", - .parent = &l4_ck, - .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, - .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN2), - .enable_bit = OMAP24XX_EN_SSI_SHIFT, - .recalc = &followparent_recalc, -}; - -/* * DSS clock domain * CLOCKs: * DSS_L4_ICLK, DSS_L3_ICLK, @@ -1409,6 +1434,7 @@ static struct clk dss_ick = { /* Enables both L3,L4 ICLK's */ .name = "dss_ick", .parent = &l4_ck, /* really both l3 and l4 */ .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "dss_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_DSS1_SHIFT, .recalc = &followparent_recalc, @@ -1419,6 +1445,7 @@ static struct clk dss1_fck = { .parent = &core_ck, /* Core or sys */ .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | DELAYED_APP, + .clkdm_name = "dss_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_DSS1_SHIFT, .init = &omap2_init_clksel_parent, @@ -1451,6 +1478,7 @@ static struct clk dss2_fck = { /* Alt clk used in power management */ .parent = &sys_ck, /* fixed at sys_ck or 48MHz */ .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | DELAYED_APP, + .clkdm_name = "dss_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_DSS2_SHIFT, .init = &omap2_init_clksel_parent, @@ -1464,6 +1492,7 @@ static struct clk dss_54m_fck = { /* Alt clk used in power management */ .name = "dss_54m_fck", /* 54m tv clk */ .parent = &func_54m_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "dss_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_TV_SHIFT, .recalc = &followparent_recalc, @@ -1491,6 +1520,7 @@ static struct clk gpt1_ick = { .name = "gpt1_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_ICLKEN), .enable_bit = OMAP24XX_EN_GPT1_SHIFT, .recalc = &followparent_recalc, @@ -1500,6 +1530,7 @@ static struct clk gpt1_fck = { .name = "gpt1_fck", .parent = &func_32k_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_FCLKEN), .enable_bit = OMAP24XX_EN_GPT1_SHIFT, .init = &omap2_init_clksel_parent, @@ -1515,6 +1546,7 @@ static struct clk gpt2_ick = { .name = "gpt2_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_GPT2_SHIFT, .recalc = &followparent_recalc, @@ -1524,6 +1556,7 @@ static struct clk gpt2_fck = { .name = "gpt2_fck", .parent = &func_32k_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_GPT2_SHIFT, .init = &omap2_init_clksel_parent, @@ -1537,6 +1570,7 @@ static struct clk gpt3_ick = { .name = "gpt3_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_GPT3_SHIFT, .recalc = &followparent_recalc, @@ -1546,6 +1580,7 @@ static struct clk gpt3_fck = { .name = "gpt3_fck", .parent = &func_32k_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_GPT3_SHIFT, .init = &omap2_init_clksel_parent, @@ -1559,6 +1594,7 @@ static struct clk gpt4_ick = { .name = "gpt4_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_GPT4_SHIFT, .recalc = &followparent_recalc, @@ -1568,6 +1604,7 @@ static struct clk gpt4_fck = { .name = "gpt4_fck", .parent = &func_32k_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_GPT4_SHIFT, .init = &omap2_init_clksel_parent, @@ -1581,6 +1618,7 @@ static struct clk gpt5_ick = { .name = "gpt5_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_GPT5_SHIFT, .recalc = &followparent_recalc, @@ -1590,6 +1628,7 @@ static struct clk gpt5_fck = { .name = "gpt5_fck", .parent = &func_32k_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_GPT5_SHIFT, .init = &omap2_init_clksel_parent, @@ -1603,6 +1642,7 @@ static struct clk gpt6_ick = { .name = "gpt6_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_GPT6_SHIFT, .recalc = &followparent_recalc, @@ -1612,6 +1652,7 @@ static struct clk gpt6_fck = { .name = "gpt6_fck", .parent = &func_32k_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_GPT6_SHIFT, .init = &omap2_init_clksel_parent, @@ -1634,6 +1675,7 @@ static struct clk gpt7_fck = { .name = "gpt7_fck", .parent = &func_32k_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_GPT7_SHIFT, .init = &omap2_init_clksel_parent, @@ -1647,6 +1689,7 @@ static struct clk gpt8_ick = { .name = "gpt8_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_GPT8_SHIFT, .recalc = &followparent_recalc, @@ -1656,6 +1699,7 @@ static struct clk gpt8_fck = { .name = "gpt8_fck", .parent = &func_32k_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_GPT8_SHIFT, .init = &omap2_init_clksel_parent, @@ -1669,6 +1713,7 @@ static struct clk gpt9_ick = { .name = "gpt9_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_GPT9_SHIFT, .recalc = &followparent_recalc, @@ -1678,6 +1723,7 @@ static struct clk gpt9_fck = { .name = "gpt9_fck", .parent = &func_32k_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_GPT9_SHIFT, .init = &omap2_init_clksel_parent, @@ -1691,6 +1737,7 @@ static struct clk gpt10_ick = { .name = "gpt10_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_GPT10_SHIFT, .recalc = &followparent_recalc, @@ -1700,6 +1747,7 @@ static struct clk gpt10_fck = { .name = "gpt10_fck", .parent = &func_32k_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_GPT10_SHIFT, .init = &omap2_init_clksel_parent, @@ -1713,6 +1761,7 @@ static struct clk gpt11_ick = { .name = "gpt11_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_GPT11_SHIFT, .recalc = &followparent_recalc, @@ -1722,6 +1771,7 @@ static struct clk gpt11_fck = { .name = "gpt11_fck", .parent = &func_32k_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_GPT11_SHIFT, .init = &omap2_init_clksel_parent, @@ -1735,6 +1785,7 @@ static struct clk gpt12_ick = { .name = "gpt12_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_GPT12_SHIFT, .recalc = &followparent_recalc, @@ -1744,6 +1795,7 @@ static struct clk gpt12_fck = { .name = "gpt12_fck", .parent = &func_32k_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_GPT12_SHIFT, .init = &omap2_init_clksel_parent, @@ -1758,6 +1810,7 @@ static struct clk mcbsp1_ick = { .id = 1, .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_MCBSP1_SHIFT, .recalc = &followparent_recalc, @@ -1768,6 +1821,7 @@ static struct clk mcbsp1_fck = { .id = 1, .parent = &func_96m_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_MCBSP1_SHIFT, .recalc = &followparent_recalc, @@ -1778,6 +1832,7 @@ static struct clk mcbsp2_ick = { .id = 2, .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_MCBSP2_SHIFT, .recalc = &followparent_recalc, @@ -1788,6 +1843,7 @@ static struct clk mcbsp2_fck = { .id = 2, .parent = &func_96m_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_MCBSP2_SHIFT, .recalc = &followparent_recalc, @@ -1798,6 +1854,7 @@ static struct clk mcbsp3_ick = { .id = 3, .parent = &l4_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN2), .enable_bit = OMAP2430_EN_MCBSP3_SHIFT, .recalc = &followparent_recalc, @@ -1808,6 +1865,7 @@ static struct clk mcbsp3_fck = { .id = 3, .parent = &func_96m_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, OMAP24XX_CM_FCLKEN2), .enable_bit = OMAP2430_EN_MCBSP3_SHIFT, .recalc = &followparent_recalc, @@ -1818,6 +1876,7 @@ static struct clk mcbsp4_ick = { .id = 4, .parent = &l4_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN2), .enable_bit = OMAP2430_EN_MCBSP4_SHIFT, .recalc = &followparent_recalc, @@ -1828,6 +1887,7 @@ static struct clk mcbsp4_fck = { .id = 4, .parent = &func_96m_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, OMAP24XX_CM_FCLKEN2), .enable_bit = OMAP2430_EN_MCBSP4_SHIFT, .recalc = &followparent_recalc, @@ -1838,6 +1898,7 @@ static struct clk mcbsp5_ick = { .id = 5, .parent = &l4_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN2), .enable_bit = OMAP2430_EN_MCBSP5_SHIFT, .recalc = &followparent_recalc, @@ -1848,6 +1909,7 @@ static struct clk mcbsp5_fck = { .id = 5, .parent = &func_96m_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, OMAP24XX_CM_FCLKEN2), .enable_bit = OMAP2430_EN_MCBSP5_SHIFT, .recalc = &followparent_recalc, @@ -1857,6 +1919,7 @@ static struct clk mcspi1_ick = { .name = "mcspi_ick", .id = 1, .parent = &l4_ck, + .clkdm_name = "core_l4_clkdm", .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_MCSPI1_SHIFT, @@ -1868,6 +1931,7 @@ static struct clk mcspi1_fck = { .id = 1, .parent = &func_48m_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_MCSPI1_SHIFT, .recalc = &followparent_recalc, @@ -1878,6 +1942,7 @@ static struct clk mcspi2_ick = { .id = 2, .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_MCSPI2_SHIFT, .recalc = &followparent_recalc, @@ -1888,6 +1953,7 @@ static struct clk mcspi2_fck = { .id = 2, .parent = &func_48m_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_MCSPI2_SHIFT, .recalc = &followparent_recalc, @@ -1898,6 +1964,7 @@ static struct clk mcspi3_ick = { .id = 3, .parent = &l4_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN2), .enable_bit = OMAP2430_EN_MCSPI3_SHIFT, .recalc = &followparent_recalc, @@ -1908,6 +1975,7 @@ static struct clk mcspi3_fck = { .id = 3, .parent = &func_48m_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, OMAP24XX_CM_FCLKEN2), .enable_bit = OMAP2430_EN_MCSPI3_SHIFT, .recalc = &followparent_recalc, @@ -1917,6 +1985,7 @@ static struct clk uart1_ick = { .name = "uart1_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_UART1_SHIFT, .recalc = &followparent_recalc, @@ -1926,6 +1995,7 @@ static struct clk uart1_fck = { .name = "uart1_fck", .parent = &func_48m_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_UART1_SHIFT, .recalc = &followparent_recalc, @@ -1935,6 +2005,7 @@ static struct clk uart2_ick = { .name = "uart2_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_UART2_SHIFT, .recalc = &followparent_recalc, @@ -1944,6 +2015,7 @@ static struct clk uart2_fck = { .name = "uart2_fck", .parent = &func_48m_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_UART2_SHIFT, .recalc = &followparent_recalc, @@ -1953,6 +2025,7 @@ static struct clk uart3_ick = { .name = "uart3_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN2), .enable_bit = OMAP24XX_EN_UART3_SHIFT, .recalc = &followparent_recalc, @@ -1962,6 +2035,7 @@ static struct clk uart3_fck = { .name = "uart3_fck", .parent = &func_48m_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, OMAP24XX_CM_FCLKEN2), .enable_bit = OMAP24XX_EN_UART3_SHIFT, .recalc = &followparent_recalc, @@ -1971,6 +2045,7 @@ static struct clk gpios_ick = { .name = "gpios_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_ICLKEN), .enable_bit = OMAP24XX_EN_GPIOS_SHIFT, .recalc = &followparent_recalc, @@ -1980,6 +2055,7 @@ static struct clk gpios_fck = { .name = "gpios_fck", .parent = &func_32k_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "wkup_clkdm", .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_FCLKEN), .enable_bit = OMAP24XX_EN_GPIOS_SHIFT, .recalc = &followparent_recalc, @@ -1989,6 +2065,7 @@ static struct clk mpu_wdt_ick = { .name = "mpu_wdt_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_ICLKEN), .enable_bit = OMAP24XX_EN_MPU_WDT_SHIFT, .recalc = &followparent_recalc, @@ -1998,6 +2075,7 @@ static struct clk mpu_wdt_fck = { .name = "mpu_wdt_fck", .parent = &func_32k_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "wkup_clkdm", .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_FCLKEN), .enable_bit = OMAP24XX_EN_MPU_WDT_SHIFT, .recalc = &followparent_recalc, @@ -2006,31 +2084,40 @@ static struct clk mpu_wdt_fck = { static struct clk sync_32k_ick = { .name = "sync_32k_ick", .parent = &l4_ck, - .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | ENABLE_ON_INIT, + .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | + ENABLE_ON_INIT, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_ICLKEN), .enable_bit = OMAP24XX_EN_32KSYNC_SHIFT, .recalc = &followparent_recalc, }; + static struct clk wdt1_ick = { .name = "wdt1_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_ICLKEN), .enable_bit = OMAP24XX_EN_WDT1_SHIFT, .recalc = &followparent_recalc, }; + static struct clk omapctrl_ick = { .name = "omapctrl_ick", .parent = &l4_ck, - .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | ENABLE_ON_INIT, + .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | + ENABLE_ON_INIT, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_ICLKEN), .enable_bit = OMAP24XX_EN_OMAPCTRL_SHIFT, .recalc = &followparent_recalc, }; + static struct clk icr_ick = { .name = "icr_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_ICLKEN), .enable_bit = OMAP2430_EN_ICR_SHIFT, .recalc = &followparent_recalc, @@ -2040,15 +2127,22 @@ static struct clk cam_ick = { .name = "cam_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_CAM_SHIFT, .recalc = &followparent_recalc, }; +/* + * cam_fck controls both CAM_MCLK and CAM_FCLK. It should probably be + * split into two separate clocks, since the parent clocks are different + * and the clockdomains are also different. + */ static struct clk cam_fck = { .name = "cam_fck", .parent = &func_96m_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l3_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_CAM_SHIFT, .recalc = &followparent_recalc, @@ -2058,6 +2152,7 @@ static struct clk mailboxes_ick = { .name = "mailboxes_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_MAILBOXES_SHIFT, .recalc = &followparent_recalc, @@ -2067,6 +2162,7 @@ static struct clk wdt4_ick = { .name = "wdt4_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_WDT4_SHIFT, .recalc = &followparent_recalc, @@ -2076,6 +2172,7 @@ static struct clk wdt4_fck = { .name = "wdt4_fck", .parent = &func_32k_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_WDT4_SHIFT, .recalc = &followparent_recalc, @@ -2085,6 +2182,7 @@ static struct clk wdt3_ick = { .name = "wdt3_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP2420_EN_WDT3_SHIFT, .recalc = &followparent_recalc, @@ -2094,6 +2192,7 @@ static struct clk wdt3_fck = { .name = "wdt3_fck", .parent = &func_32k_ck, .flags = CLOCK_IN_OMAP242X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP2420_EN_WDT3_SHIFT, .recalc = &followparent_recalc, @@ -2103,6 +2202,7 @@ static struct clk mspro_ick = { .name = "mspro_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_MSPRO_SHIFT, .recalc = &followparent_recalc, @@ -2112,6 +2212,7 @@ static struct clk mspro_fck = { .name = "mspro_fck", .parent = &func_96m_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_MSPRO_SHIFT, .recalc = &followparent_recalc, @@ -2121,6 +2222,7 @@ static struct clk mmc_ick = { .name = "mmc_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP2420_EN_MMC_SHIFT, .recalc = &followparent_recalc, @@ -2130,6 +2232,7 @@ static struct clk mmc_fck = { .name = "mmc_fck", .parent = &func_96m_ck, .flags = CLOCK_IN_OMAP242X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP2420_EN_MMC_SHIFT, .recalc = &followparent_recalc, @@ -2139,6 +2242,7 @@ static struct clk fac_ick = { .name = "fac_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_FAC_SHIFT, .recalc = &followparent_recalc, @@ -2148,6 +2252,7 @@ static struct clk fac_fck = { .name = "fac_fck", .parent = &func_12m_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_FAC_SHIFT, .recalc = &followparent_recalc, @@ -2157,6 +2262,7 @@ static struct clk eac_ick = { .name = "eac_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP2420_EN_EAC_SHIFT, .recalc = &followparent_recalc, @@ -2166,6 +2272,7 @@ static struct clk eac_fck = { .name = "eac_fck", .parent = &func_96m_ck, .flags = CLOCK_IN_OMAP242X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP2420_EN_EAC_SHIFT, .recalc = &followparent_recalc, @@ -2175,6 +2282,7 @@ static struct clk hdq_ick = { .name = "hdq_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP24XX_EN_HDQ_SHIFT, .recalc = &followparent_recalc, @@ -2184,6 +2292,7 @@ static struct clk hdq_fck = { .name = "hdq_fck", .parent = &func_12m_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP24XX_EN_HDQ_SHIFT, .recalc = &followparent_recalc, @@ -2194,6 +2303,7 @@ static struct clk i2c2_ick = { .id = 2, .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP2420_EN_I2C2_SHIFT, .recalc = &followparent_recalc, @@ -2204,6 +2314,7 @@ static struct clk i2c2_fck = { .id = 2, .parent = &func_12m_ck, .flags = CLOCK_IN_OMAP242X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP2420_EN_I2C2_SHIFT, .recalc = &followparent_recalc, @@ -2214,6 +2325,7 @@ static struct clk i2chs2_fck = { .id = 2, .parent = &func_96m_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, OMAP24XX_CM_FCLKEN2), .enable_bit = OMAP2430_EN_I2CHS2_SHIFT, .recalc = &followparent_recalc, @@ -2224,6 +2336,7 @@ static struct clk i2c1_ick = { .id = 1, .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP2420_EN_I2C1_SHIFT, .recalc = &followparent_recalc, @@ -2234,6 +2347,7 @@ static struct clk i2c1_fck = { .id = 1, .parent = &func_12m_ck, .flags = CLOCK_IN_OMAP242X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP2420_EN_I2C1_SHIFT, .recalc = &followparent_recalc, @@ -2244,6 +2358,7 @@ static struct clk i2chs1_fck = { .id = 1, .parent = &func_96m_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, OMAP24XX_CM_FCLKEN2), .enable_bit = OMAP2430_EN_I2CHS1_SHIFT, .recalc = &followparent_recalc, @@ -2252,7 +2367,9 @@ static struct clk i2chs1_fck = { static struct clk gpmc_fck = { .name = "gpmc_fck", .parent = &core_l3_ck, - .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | ENABLE_ON_INIT, + .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X | + ENABLE_ON_INIT, + .clkdm_name = "core_l3_clkdm", .recalc = &followparent_recalc, }; @@ -2260,6 +2377,7 @@ static struct clk sdma_fck = { .name = "sdma_fck", .parent = &core_l3_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l3_clkdm", .recalc = &followparent_recalc, }; @@ -2267,6 +2385,7 @@ static struct clk sdma_ick = { .name = "sdma_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP242X | CLOCK_IN_OMAP243X, + .clkdm_name = "core_l3_clkdm", .recalc = &followparent_recalc, }; @@ -2274,6 +2393,7 @@ static struct clk vlynq_ick = { .name = "vlynq_ick", .parent = &core_l3_ck, .flags = CLOCK_IN_OMAP242X, + .clkdm_name = "core_l3_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP2420_EN_VLYNQ_SHIFT, .recalc = &followparent_recalc, @@ -2308,6 +2428,7 @@ static struct clk vlynq_fck = { .name = "vlynq_fck", .parent = &func_96m_ck, .flags = CLOCK_IN_OMAP242X | DELAYED_APP, + .clkdm_name = "core_l3_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP2420_EN_VLYNQ_SHIFT, .init = &omap2_init_clksel_parent, @@ -2323,6 +2444,7 @@ static struct clk sdrc_ick = { .name = "sdrc_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP243X | ENABLE_ON_INIT, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN3), .enable_bit = OMAP2430_EN_SDRC_SHIFT, .recalc = &followparent_recalc, @@ -2332,6 +2454,7 @@ static struct clk des_ick = { .name = "des_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP243X | CLOCK_IN_OMAP242X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, OMAP24XX_CM_ICLKEN4), .enable_bit = OMAP24XX_EN_DES_SHIFT, .recalc = &followparent_recalc, @@ -2341,6 +2464,7 @@ static struct clk sha_ick = { .name = "sha_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP243X | CLOCK_IN_OMAP242X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, OMAP24XX_CM_ICLKEN4), .enable_bit = OMAP24XX_EN_SHA_SHIFT, .recalc = &followparent_recalc, @@ -2350,6 +2474,7 @@ static struct clk rng_ick = { .name = "rng_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP243X | CLOCK_IN_OMAP242X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, OMAP24XX_CM_ICLKEN4), .enable_bit = OMAP24XX_EN_RNG_SHIFT, .recalc = &followparent_recalc, @@ -2359,6 +2484,7 @@ static struct clk aes_ick = { .name = "aes_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP243X | CLOCK_IN_OMAP242X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, OMAP24XX_CM_ICLKEN4), .enable_bit = OMAP24XX_EN_AES_SHIFT, .recalc = &followparent_recalc, @@ -2368,6 +2494,7 @@ static struct clk pka_ick = { .name = "pka_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP243X | CLOCK_IN_OMAP242X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, OMAP24XX_CM_ICLKEN4), .enable_bit = OMAP24XX_EN_PKA_SHIFT, .recalc = &followparent_recalc, @@ -2377,6 +2504,7 @@ static struct clk usb_fck = { .name = "usb_fck", .parent = &func_48m_ck, .flags = CLOCK_IN_OMAP243X | CLOCK_IN_OMAP242X, + .clkdm_name = "core_l3_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, OMAP24XX_CM_FCLKEN2), .enable_bit = OMAP24XX_EN_USB_SHIFT, .recalc = &followparent_recalc, @@ -2386,6 +2514,7 @@ static struct clk usbhs_ick = { .name = "usbhs_ick", .parent = &core_l3_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l3_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN2), .enable_bit = OMAP2430_EN_USBHS_SHIFT, .recalc = &followparent_recalc, @@ -2396,6 +2525,7 @@ static struct clk mmchs1_ick = { .id = 1, .parent = &l4_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN2), .enable_bit = OMAP2430_EN_MMCHS1_SHIFT, .recalc = &followparent_recalc, @@ -2406,6 +2536,7 @@ static struct clk mmchs1_fck = { .id = 1, .parent = &func_96m_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l3_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, OMAP24XX_CM_FCLKEN2), .enable_bit = OMAP2430_EN_MMCHS1_SHIFT, .recalc = &followparent_recalc, @@ -2416,6 +2547,7 @@ static struct clk mmchs2_ick = { .id = 2, .parent = &l4_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN2), .enable_bit = OMAP2430_EN_MMCHS2_SHIFT, .recalc = &followparent_recalc, @@ -2435,6 +2567,7 @@ static struct clk gpio5_ick = { .name = "gpio5_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN2), .enable_bit = OMAP2430_EN_GPIO5_SHIFT, .recalc = &followparent_recalc, @@ -2444,6 +2577,7 @@ static struct clk gpio5_fck = { .name = "gpio5_fck", .parent = &func_32k_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, OMAP24XX_CM_FCLKEN2), .enable_bit = OMAP2430_EN_GPIO5_SHIFT, .recalc = &followparent_recalc, @@ -2453,6 +2587,7 @@ static struct clk mdm_intc_ick = { .name = "mdm_intc_ick", .parent = &l4_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN2), .enable_bit = OMAP2430_EN_MDM_INTC_SHIFT, .recalc = &followparent_recalc, @@ -2463,6 +2598,7 @@ static struct clk mmchsdb1_fck = { .id = 1, .parent = &func_32k_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, OMAP24XX_CM_FCLKEN2), .enable_bit = OMAP2430_EN_MMCHSDB1_SHIFT, .recalc = &followparent_recalc, @@ -2473,6 +2609,7 @@ static struct clk mmchsdb2_fck = { .id = 2, .parent = &func_32k_ck, .flags = CLOCK_IN_OMAP243X, + .clkdm_name = "core_l4_clkdm", .enable_reg = OMAP_CM_REGADDR(CORE_MOD, OMAP24XX_CM_FCLKEN2), .enable_bit = OMAP2430_EN_MMCHSDB2_SHIFT, .recalc = &followparent_recalc, @@ -2551,7 +2688,6 @@ static struct clk *onchip_24xx_clks[] __initdata = { &usb_l4_ick, /* L4 domain clocks */ &l4_ck, /* used as both core_l4 and wu_l4 */ - &ssi_l4_ick, /* virtual meta-group clock */ &virt_prcm_set, /* general l4 interface ck, multi-parent functional clk */ diff --git a/arch/arm/plat-omap/include/mach/clock.h b/arch/arm/plat-omap/include/mach/clock.h index 92f7c72..7192985 100644 --- a/arch/arm/plat-omap/include/mach/clock.h +++ b/arch/arm/plat-omap/include/mach/clock.h @@ -15,6 +15,7 @@ struct module; struct clk; +struct clockdomain; #if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3) @@ -79,6 +80,8 @@ struct clk { u32 clksel_mask; const struct clksel *clksel; struct dpll_data *dpll_data; + const char *clkdm_name; + struct clockdomain *clkdm; #else __u8 rate_offset; __u8 src_offset; -- cgit v0.10.2 From 333943ba9e1716a3751af82f2dcc7620b83091ed Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Tue, 19 Aug 2008 11:08:45 +0300 Subject: ARM: OMAP2: Clockdomain: Integrate OMAP3 clocks with clockdomain code This patch integrates the OMAP3 clock tree with the clockdomain code. This patch: - marks OMAP34xx clocks with their corresponding clockdomain. - adds code to convert the clockdomain name to a clockdomain pointer in the struct clk during clk_register(). - modifies OMAP2 clock usecounting to call into the clockdomain code when clocks are enabled or disabled. Signed-off-by: Paul Walmsley Signed-off-by: Tony Lindgren diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c index 1d891e4..aa9b373 100644 --- a/arch/arm/mach-omap2/clock.c +++ b/arch/arm/mach-omap2/clock.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -62,10 +63,36 @@ u8 cpu_mask; /*------------------------------------------------------------------------- - * Omap2 specific clock functions + * OMAP2/3 specific clock functions *-------------------------------------------------------------------------*/ /** + * omap2_init_clk_clkdm - look up a clockdomain name, store pointer in clk + * @clk: OMAP clock struct ptr to use + * + * Convert a clockdomain name stored in a struct clk 'clk' into a + * clockdomain pointer, and save it into the struct clk. Intended to be + * called during clk_register(). No return value. + */ +void omap2_init_clk_clkdm(struct clk *clk) +{ + struct clockdomain *clkdm; + + if (!clk->clkdm_name) + return; + + clkdm = clkdm_lookup(clk->clkdm_name); + if (clkdm) { + pr_debug("clock: associated clk %s to clkdm %s\n", + clk->name, clk->clkdm_name); + clk->clkdm = clkdm; + } else { + pr_debug("clock: could not associate clk %s to " + "clkdm %s\n", clk->name, clk->clkdm_name); + } +} + +/** * omap2_init_clksel_parent - set a clksel clk's parent field from the hardware * @clk: OMAP clock struct ptr to use * @@ -308,6 +335,9 @@ void omap2_clk_disable(struct clk *clk) _omap2_clk_disable(clk); if (likely((u32)clk->parent)) omap2_clk_disable(clk->parent); + if (clk->clkdm) + omap2_clkdm_clk_disable(clk->clkdm, clk); + } } @@ -324,11 +354,19 @@ int omap2_clk_enable(struct clk *clk) return ret; } + if (clk->clkdm) + omap2_clkdm_clk_enable(clk->clkdm, clk); + ret = _omap2_clk_enable(clk); - if (unlikely(ret != 0) && clk->parent) { - omap2_clk_disable(clk->parent); - clk->usecount--; + if (unlikely(ret != 0)) { + if (clk->clkdm) + omap2_clkdm_clk_disable(clk->clkdm, clk); + + if (clk->parent) { + omap2_clk_disable(clk->parent); + clk->usecount--; + } } } diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h index 626e5fa..ea55f28 100644 --- a/arch/arm/mach-omap2/clock.h +++ b/arch/arm/mach-omap2/clock.h @@ -36,6 +36,7 @@ void omap2_clk_disable_unused(struct clk *clk); #endif void omap2_clksel_recalc(struct clk *clk); +void omap2_init_clk_clkdm(struct clk *clk); void omap2_init_clksel_parent(struct clk *clk); u32 omap2_clksel_get_divisor(struct clk *clk); u32 omap2_clksel_round_rate_div(struct clk *clk, unsigned long target_rate, diff --git a/arch/arm/mach-omap2/clock34xx.c b/arch/arm/mach-omap2/clock34xx.c index dff7a72f..0bf661d 100644 --- a/arch/arm/mach-omap2/clock34xx.c +++ b/arch/arm/mach-omap2/clock34xx.c @@ -489,8 +489,10 @@ int __init omap2_clk_init(void) for (clkp = onchip_34xx_clks; clkp < onchip_34xx_clks + ARRAY_SIZE(onchip_34xx_clks); clkp++) { - if ((*clkp)->flags & cpu_clkflg) + if ((*clkp)->flags & cpu_clkflg) { clk_register(*clkp); + omap2_init_clk_clkdm(*clkp); + } } /* REVISIT: Not yet ready for OMAP3 */ diff --git a/arch/arm/mach-omap2/clock34xx.h b/arch/arm/mach-omap2/clock34xx.h index ec66445..bb01e20 100644 --- a/arch/arm/mach-omap2/clock34xx.h +++ b/arch/arm/mach-omap2/clock34xx.h @@ -478,7 +478,7 @@ static struct clk dpll3_m2_ck = { }; static const struct clksel core_ck_clksel[] = { - { .parent = &sys_ck, .rates = dpll_bypass_rates }, + { .parent = &sys_ck, .rates = dpll_bypass_rates }, { .parent = &dpll3_m2_ck, .rates = dpll_locked_rates }, { .parent = NULL } }; @@ -495,7 +495,7 @@ static struct clk core_ck = { }; static const struct clksel dpll3_m2x2_ck_clksel[] = { - { .parent = &sys_ck, .rates = dpll_bypass_rates }, + { .parent = &sys_ck, .rates = dpll_bypass_rates }, { .parent = &dpll3_x2_ck, .rates = dpll_locked_rates }, { .parent = NULL } }; @@ -541,7 +541,7 @@ static struct clk dpll3_m3x2_ck = { }; static const struct clksel emu_core_alwon_ck_clksel[] = { - { .parent = &sys_ck, .rates = dpll_bypass_rates }, + { .parent = &sys_ck, .rates = dpll_bypass_rates }, { .parent = &dpll3_m3x2_ck, .rates = dpll_locked_rates }, { .parent = NULL } }; @@ -633,7 +633,7 @@ static struct clk dpll4_m2x2_ck = { }; static const struct clksel omap_96m_alwon_fck_clksel[] = { - { .parent = &sys_ck, .rates = dpll_bypass_rates }, + { .parent = &sys_ck, .rates = dpll_bypass_rates }, { .parent = &dpll4_m2x2_ck, .rates = dpll_locked_rates }, { .parent = NULL } }; @@ -659,7 +659,7 @@ static struct clk omap_96m_fck = { }; static const struct clksel cm_96m_fck_clksel[] = { - { .parent = &sys_ck, .rates = dpll_bypass_rates }, + { .parent = &sys_ck, .rates = dpll_bypass_rates }, { .parent = &dpll4_m2x2_ck, .rates = dpll_locked_rates }, { .parent = NULL } }; @@ -701,7 +701,7 @@ static struct clk dpll4_m3x2_ck = { }; static const struct clksel virt_omap_54m_fck_clksel[] = { - { .parent = &sys_ck, .rates = dpll_bypass_rates }, + { .parent = &sys_ck, .rates = dpll_bypass_rates }, { .parent = &dpll4_m3x2_ck, .rates = dpll_locked_rates }, { .parent = NULL } }; @@ -911,7 +911,7 @@ static struct clk dpll5_m2_ck = { }; static const struct clksel omap_120m_fck_clksel[] = { - { .parent = &sys_ck, .rates = dpll_bypass_rates }, + { .parent = &sys_ck, .rates = dpll_bypass_rates }, { .parent = &dpll5_m2_ck, .rates = dpll_locked_rates }, { .parent = NULL } }; @@ -919,13 +919,13 @@ static const struct clksel omap_120m_fck_clksel[] = { static struct clk omap_120m_fck = { .name = "omap_120m_fck", .parent = &dpll5_m2_ck, - .init = &omap2_init_clksel_parent, - .clksel_reg = OMAP_CM_REGADDR(PLL_MOD, CM_IDLEST2), - .clksel_mask = OMAP3430ES2_ST_PERIPH2_CLK_MASK, - .clksel = omap_120m_fck_clksel, + .init = &omap2_init_clksel_parent, + .clksel_reg = OMAP_CM_REGADDR(PLL_MOD, CM_IDLEST2), + .clksel_mask = OMAP3430ES2_ST_PERIPH2_CLK_MASK, + .clksel = omap_120m_fck_clksel, .flags = CLOCK_IN_OMAP3430ES2 | RATE_PROPAGATES | PARENT_CONTROLS_CLOCK, - .recalc = &omap2_clksel_recalc, + .recalc = &omap2_clksel_recalc, }; /* CM EXTERNAL CLOCK OUTPUTS */ @@ -1034,7 +1034,7 @@ static struct clk dpll1_fck = { * called 'dpll1_fck' */ static const struct clksel mpu_clksel[] = { - { .parent = &dpll1_fck, .rates = dpll_bypass_rates }, + { .parent = &dpll1_fck, .rates = dpll_bypass_rates }, { .parent = &dpll1_x2m2_ck, .rates = dpll_locked_rates }, { .parent = NULL } }; @@ -1048,6 +1048,7 @@ static struct clk mpu_ck = { .clksel = mpu_clksel, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | PARENT_CONTROLS_CLOCK, + .clkdm_name = "mpu_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -1075,6 +1076,8 @@ static struct clk arm_fck = { .recalc = &omap2_clksel_recalc, }; +/* XXX What about neon_clkdm ? */ + /* * REVISIT: This clock is never specifically defined in the 3430 TRM, * although it is referenced - so this is a guess @@ -1107,7 +1110,7 @@ static struct clk dpll2_fck = { */ static const struct clksel iva2_clksel[] = { - { .parent = &dpll2_fck, .rates = dpll_bypass_rates }, + { .parent = &dpll2_fck, .rates = dpll_bypass_rates }, { .parent = &dpll2_m2_ck, .rates = dpll_locked_rates }, { .parent = NULL } }; @@ -1123,6 +1126,7 @@ static struct clk iva2_ck = { .clksel_mask = OMAP3430_ST_IVA2_CLK_MASK, .clksel = iva2_clksel, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES, + .clkdm_name = "iva2_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -1137,6 +1141,7 @@ static struct clk l3_ick = { .clksel = div2_core_clksel, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | PARENT_CONTROLS_CLOCK, + .clkdm_name = "core_l3_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -1154,6 +1159,7 @@ static struct clk l4_ick = { .clksel = div2_l3_clksel, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | PARENT_CONTROLS_CLOCK, + .clkdm_name = "core_l4_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -1193,33 +1199,40 @@ static struct clk gfx_l3_fck = { .clksel_mask = OMAP_CLKSEL_GFX_MASK, .clksel = gfx_l3_clksel, .flags = CLOCK_IN_OMAP3430ES1 | RATE_PROPAGATES, + .clkdm_name = "gfx_3430es1_clkdm", .recalc = &omap2_clksel_recalc, }; static struct clk gfx_l3_ick = { .name = "gfx_l3_ick", .parent = &l3_ick, + .init = &omap2_init_clk_clkdm, .enable_reg = OMAP_CM_REGADDR(GFX_MOD, CM_ICLKEN), .enable_bit = OMAP_EN_GFX_SHIFT, .flags = CLOCK_IN_OMAP3430ES1, + .clkdm_name = "gfx_3430es1_clkdm", .recalc = &followparent_recalc, }; static struct clk gfx_cg1_ck = { .name = "gfx_cg1_ck", .parent = &gfx_l3_fck, /* REVISIT: correct? */ + .init = &omap2_init_clk_clkdm, .enable_reg = OMAP_CM_REGADDR(GFX_MOD, CM_FCLKEN), .enable_bit = OMAP3430ES1_EN_2D_SHIFT, .flags = CLOCK_IN_OMAP3430ES1, + .clkdm_name = "gfx_3430es1_clkdm", .recalc = &followparent_recalc, }; static struct clk gfx_cg2_ck = { .name = "gfx_cg2_ck", .parent = &gfx_l3_fck, /* REVISIT: correct? */ + .init = &omap2_init_clk_clkdm, .enable_reg = OMAP_CM_REGADDR(GFX_MOD, CM_FCLKEN), .enable_bit = OMAP3430ES1_EN_3D_SHIFT, .flags = CLOCK_IN_OMAP3430ES1, + .clkdm_name = "gfx_3430es1_clkdm", .recalc = &followparent_recalc, }; @@ -1252,15 +1265,18 @@ static struct clk sgx_fck = { .clksel_mask = OMAP3430ES2_CLKSEL_SGX_MASK, .clksel = sgx_clksel, .flags = CLOCK_IN_OMAP3430ES2, + .clkdm_name = "sgx_clkdm", .recalc = &omap2_clksel_recalc, }; static struct clk sgx_ick = { .name = "sgx_ick", .parent = &l3_ick, + .init = &omap2_init_clk_clkdm, .enable_reg = OMAP_CM_REGADDR(OMAP3430ES2_SGX_MOD, CM_ICLKEN), .enable_bit = OMAP3430ES2_EN_SGX_SHIFT, .flags = CLOCK_IN_OMAP3430ES2, + .clkdm_name = "sgx_clkdm", .recalc = &followparent_recalc, }; @@ -1269,9 +1285,11 @@ static struct clk sgx_ick = { static struct clk d2d_26m_fck = { .name = "d2d_26m_fck", .parent = &sys_ck, + .init = &omap2_init_clk_clkdm, .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP3430ES1_EN_D2D_SHIFT, .flags = CLOCK_IN_OMAP3430ES1, + .clkdm_name = "d2d_clkdm", .recalc = &followparent_recalc, }; @@ -1291,6 +1309,7 @@ static struct clk gpt10_fck = { .clksel_mask = OMAP3430_CLKSEL_GPT10_MASK, .clksel = omap343x_gpt_clksel, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -1304,6 +1323,7 @@ static struct clk gpt11_fck = { .clksel_mask = OMAP3430_CLKSEL_GPT11_MASK, .clksel = omap343x_gpt_clksel, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -1341,6 +1361,7 @@ static struct clk core_96m_fck = { .parent = &omap_96m_fck, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | PARENT_CONTROLS_CLOCK, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1351,6 +1372,7 @@ static struct clk mmchs3_fck = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP3430ES2_EN_MMC3_SHIFT, .flags = CLOCK_IN_OMAP3430ES2, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1361,6 +1383,7 @@ static struct clk mmchs2_fck = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP3430_EN_MMC2_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1370,6 +1393,7 @@ static struct clk mspro_fck = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP3430_EN_MSPRO_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1380,6 +1404,7 @@ static struct clk mmchs1_fck = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP3430_EN_MMC1_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1390,16 +1415,18 @@ static struct clk i2c3_fck = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP3430_EN_I2C3_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; static struct clk i2c2_fck = { .name = "i2c_fck", - .id = 2, + .id = 2, .parent = &core_96m_fck, .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP3430_EN_I2C2_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1410,6 +1437,7 @@ static struct clk i2c1_fck = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_FCLKEN1), .enable_bit = OMAP3430_EN_I2C1_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1443,6 +1471,7 @@ static struct clk mcbsp5_fck = { .clksel_mask = OMAP2_MCBSP5_CLKS_MASK, .clksel = mcbsp_15_clksel, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -1456,6 +1485,7 @@ static struct clk mcbsp1_fck = { .clksel_mask = OMAP2_MCBSP1_CLKS_MASK, .clksel = mcbsp_15_clksel, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -1466,6 +1496,7 @@ static struct clk core_48m_fck = { .parent = &omap_48m_fck, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | PARENT_CONTROLS_CLOCK, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1543,6 +1574,7 @@ static struct clk core_12m_fck = { .parent = &omap_12m_fck, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | PARENT_CONTROLS_CLOCK, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1581,6 +1613,7 @@ static struct clk ssi_ssr_fck = { .clksel_mask = OMAP3430_CLKSEL_SSI_MASK, .clksel = ssi_ssr_clksel, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES, + .clkdm_name = "core_l4_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -1596,11 +1629,17 @@ static struct clk ssi_sst_fck = { /* CORE_L3_ICK based clocks */ +/* + * XXX must add clk_enable/clk_disable for these if standard code won't + * handle it + */ static struct clk core_l3_ick = { .name = "core_l3_ick", .parent = &l3_ick, + .init = &omap2_init_clk_clkdm, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | PARENT_CONTROLS_CLOCK, + .clkdm_name = "core_l3_clkdm", .recalc = &followparent_recalc, }; @@ -1610,6 +1649,7 @@ static struct clk hsotgusb_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_HSOTGUSB_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l3_clkdm", .recalc = &followparent_recalc, }; @@ -1619,6 +1659,7 @@ static struct clk sdrc_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_SDRC_SHIFT, .flags = CLOCK_IN_OMAP343X | ENABLE_ON_INIT, + .clkdm_name = "core_l3_clkdm", .recalc = &followparent_recalc, }; @@ -1627,6 +1668,7 @@ static struct clk gpmc_fck = { .parent = &core_l3_ick, .flags = CLOCK_IN_OMAP343X | PARENT_CONTROLS_CLOCK | ENABLE_ON_INIT, + .clkdm_name = "core_l3_clkdm", .recalc = &followparent_recalc, }; @@ -1654,8 +1696,10 @@ static struct clk pka_ick = { static struct clk core_l4_ick = { .name = "core_l4_ick", .parent = &l4_ick, + .init = &omap2_init_clk_clkdm, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | PARENT_CONTROLS_CLOCK, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1665,6 +1709,7 @@ static struct clk usbtll_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN3), .enable_bit = OMAP3430ES2_EN_USBTLL_SHIFT, .flags = CLOCK_IN_OMAP3430ES2, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1675,6 +1720,7 @@ static struct clk mmchs3_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430ES2_EN_MMC3_SHIFT, .flags = CLOCK_IN_OMAP3430ES2, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1685,6 +1731,7 @@ static struct clk icr_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_ICR_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1694,6 +1741,7 @@ static struct clk aes2_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_AES2_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1703,6 +1751,7 @@ static struct clk sha12_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_SHA12_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1712,6 +1761,7 @@ static struct clk des2_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_DES2_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1722,6 +1772,7 @@ static struct clk mmchs2_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_MMC2_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1732,6 +1783,7 @@ static struct clk mmchs1_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_MMC1_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1741,6 +1793,7 @@ static struct clk mspro_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_MSPRO_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1750,6 +1803,7 @@ static struct clk hdq_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_HDQ_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1760,6 +1814,7 @@ static struct clk mcspi4_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_MCSPI4_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1770,6 +1825,7 @@ static struct clk mcspi3_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_MCSPI3_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1780,6 +1836,7 @@ static struct clk mcspi2_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_MCSPI2_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1790,6 +1847,7 @@ static struct clk mcspi1_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_MCSPI1_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1800,6 +1858,7 @@ static struct clk i2c3_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_I2C3_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1810,6 +1869,7 @@ static struct clk i2c2_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_I2C2_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1820,6 +1880,7 @@ static struct clk i2c1_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_I2C1_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1829,6 +1890,7 @@ static struct clk uart2_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_UART2_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1838,6 +1900,7 @@ static struct clk uart1_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_UART1_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1847,6 +1910,7 @@ static struct clk gpt11_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_GPT11_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1856,6 +1920,7 @@ static struct clk gpt10_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_GPT10_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1866,6 +1931,7 @@ static struct clk mcbsp5_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_MCBSP5_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1876,6 +1942,7 @@ static struct clk mcbsp1_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_MCBSP1_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1885,6 +1952,7 @@ static struct clk fac_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430ES1_EN_FAC_SHIFT, .flags = CLOCK_IN_OMAP3430ES1, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1894,6 +1962,7 @@ static struct clk mailboxes_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_MAILBOXES_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1913,6 +1982,7 @@ static struct clk ssi_l4_ick = { .parent = &l4_ick, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | PARENT_CONTROLS_CLOCK, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1922,6 +1992,7 @@ static struct clk ssi_ick = { .enable_reg = OMAP_CM_REGADDR(CORE_MOD, CM_ICLKEN1), .enable_bit = OMAP3430_EN_SSI_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; @@ -1996,7 +2067,7 @@ static struct clk des1_ick = { /* DSS */ static const struct clksel dss1_alwon_fck_clksel[] = { - { .parent = &sys_ck, .rates = dpll_bypass_rates }, + { .parent = &sys_ck, .rates = dpll_bypass_rates }, { .parent = &dpll4_m4x2_ck, .rates = dpll_locked_rates }, { .parent = NULL } }; @@ -2011,33 +2082,40 @@ static struct clk dss1_alwon_fck = { .clksel_mask = OMAP3430_ST_PERIPH_CLK_MASK, .clksel = dss1_alwon_fck_clksel, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "dss_clkdm", .recalc = &omap2_clksel_recalc, }; static struct clk dss_tv_fck = { .name = "dss_tv_fck", .parent = &omap_54m_fck, + .init = &omap2_init_clk_clkdm, .enable_reg = OMAP_CM_REGADDR(OMAP3430_DSS_MOD, CM_FCLKEN), .enable_bit = OMAP3430_EN_TV_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "dss_clkdm", .recalc = &followparent_recalc, }; static struct clk dss_96m_fck = { .name = "dss_96m_fck", .parent = &omap_96m_fck, + .init = &omap2_init_clk_clkdm, .enable_reg = OMAP_CM_REGADDR(OMAP3430_DSS_MOD, CM_FCLKEN), .enable_bit = OMAP3430_EN_TV_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "dss_clkdm", .recalc = &followparent_recalc, }; static struct clk dss2_alwon_fck = { .name = "dss2_alwon_fck", .parent = &sys_ck, + .init = &omap2_init_clk_clkdm, .enable_reg = OMAP_CM_REGADDR(OMAP3430_DSS_MOD, CM_FCLKEN), .enable_bit = OMAP3430_EN_DSS2_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "dss_clkdm", .recalc = &followparent_recalc, }; @@ -2045,16 +2123,18 @@ static struct clk dss_ick = { /* Handles both L3 and L4 clocks */ .name = "dss_ick", .parent = &l4_ick, + .init = &omap2_init_clk_clkdm, .enable_reg = OMAP_CM_REGADDR(OMAP3430_DSS_MOD, CM_ICLKEN), .enable_bit = OMAP3430_CM_ICLKEN_DSS_EN_DSS_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "dss_clkdm", .recalc = &followparent_recalc, }; /* CAM */ static const struct clksel cam_mclk_clksel[] = { - { .parent = &sys_ck, .rates = dpll_bypass_rates }, + { .parent = &sys_ck, .rates = dpll_bypass_rates }, { .parent = &dpll4_m5x2_ck, .rates = dpll_locked_rates }, { .parent = NULL } }; @@ -2069,24 +2149,29 @@ static struct clk cam_mclk = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_CAM_MOD, CM_FCLKEN), .enable_bit = OMAP3430_EN_CAM_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "cam_clkdm", .recalc = &omap2_clksel_recalc, }; static struct clk cam_l3_ick = { .name = "cam_l3_ick", .parent = &l3_ick, + .init = &omap2_init_clk_clkdm, .enable_reg = OMAP_CM_REGADDR(OMAP3430_CAM_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_CAM_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "cam_clkdm", .recalc = &followparent_recalc, }; static struct clk cam_l4_ick = { .name = "cam_l4_ick", .parent = &l4_ick, + .init = &omap2_init_clk_clkdm, .enable_reg = OMAP_CM_REGADDR(OMAP3430_CAM_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_CAM_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "cam_clkdm", .recalc = &followparent_recalc, }; @@ -2095,45 +2180,55 @@ static struct clk cam_l4_ick = { static struct clk usbhost_120m_fck = { .name = "usbhost_120m_fck", .parent = &omap_120m_fck, + .init = &omap2_init_clk_clkdm, .enable_reg = OMAP_CM_REGADDR(OMAP3430ES2_USBHOST_MOD, CM_FCLKEN), .enable_bit = OMAP3430ES2_EN_USBHOST2_SHIFT, .flags = CLOCK_IN_OMAP3430ES2, + .clkdm_name = "usbhost_clkdm", .recalc = &followparent_recalc, }; static struct clk usbhost_48m_fck = { .name = "usbhost_48m_fck", .parent = &omap_48m_fck, + .init = &omap2_init_clk_clkdm, .enable_reg = OMAP_CM_REGADDR(OMAP3430ES2_USBHOST_MOD, CM_FCLKEN), .enable_bit = OMAP3430ES2_EN_USBHOST1_SHIFT, .flags = CLOCK_IN_OMAP3430ES2, + .clkdm_name = "usbhost_clkdm", .recalc = &followparent_recalc, }; static struct clk usbhost_l3_ick = { .name = "usbhost_l3_ick", .parent = &l3_ick, + .init = &omap2_init_clk_clkdm, .enable_reg = OMAP_CM_REGADDR(OMAP3430ES2_USBHOST_MOD, CM_ICLKEN), .enable_bit = OMAP3430ES2_EN_USBHOST_SHIFT, .flags = CLOCK_IN_OMAP3430ES2, + .clkdm_name = "usbhost_clkdm", .recalc = &followparent_recalc, }; static struct clk usbhost_l4_ick = { .name = "usbhost_l4_ick", .parent = &l4_ick, + .init = &omap2_init_clk_clkdm, .enable_reg = OMAP_CM_REGADDR(OMAP3430ES2_USBHOST_MOD, CM_ICLKEN), .enable_bit = OMAP3430ES2_EN_USBHOST_SHIFT, .flags = CLOCK_IN_OMAP3430ES2, + .clkdm_name = "usbhost_clkdm", .recalc = &followparent_recalc, }; static struct clk usbhost_sar_fck = { .name = "usbhost_sar_fck", .parent = &osc_sys_ck, + .init = &omap2_init_clk_clkdm, .enable_reg = OMAP_PRM_REGADDR(OMAP3430ES2_USBHOST_MOD, PM_PWSTCTRL), .enable_bit = OMAP3430ES2_SAVEANDRESTORE_SHIFT, .flags = CLOCK_IN_OMAP3430ES2, + .clkdm_name = "usbhost_clkdm", .recalc = &followparent_recalc, }; @@ -2175,6 +2270,7 @@ static struct clk usim_fck = { .recalc = &omap2_clksel_recalc, }; +/* XXX should gpt1's clksel have wkup_32k_fck as the 32k opt? */ static struct clk gpt1_fck = { .name = "gpt1_fck", .init = &omap2_init_clksel_parent, @@ -2184,13 +2280,16 @@ static struct clk gpt1_fck = { .clksel_mask = OMAP3430_CLKSEL_GPT1_MASK, .clksel = omap343x_gpt_clksel, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "wkup_clkdm", .recalc = &omap2_clksel_recalc, }; static struct clk wkup_32k_fck = { .name = "wkup_32k_fck", + .init = &omap2_init_clk_clkdm, .parent = &omap_32k_fck, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | ALWAYS_ENABLED, + .clkdm_name = "wkup_clkdm", .recalc = &followparent_recalc, }; @@ -2200,6 +2299,7 @@ static struct clk gpio1_fck = { .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_FCLKEN), .enable_bit = OMAP3430_EN_GPIO1_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "wkup_clkdm", .recalc = &followparent_recalc, }; @@ -2209,6 +2309,7 @@ static struct clk wdt2_fck = { .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_FCLKEN), .enable_bit = OMAP3430_EN_WDT2_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "wkup_clkdm", .recalc = &followparent_recalc, }; @@ -2216,6 +2317,7 @@ static struct clk wkup_l4_ick = { .name = "wkup_l4_ick", .parent = &sys_ck, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | ALWAYS_ENABLED, + .clkdm_name = "wkup_clkdm", .recalc = &followparent_recalc, }; @@ -2227,6 +2329,7 @@ static struct clk usim_ick = { .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_ICLKEN), .enable_bit = OMAP3430ES2_EN_USIMOCP_SHIFT, .flags = CLOCK_IN_OMAP3430ES2, + .clkdm_name = "wkup_clkdm", .recalc = &followparent_recalc, }; @@ -2236,6 +2339,7 @@ static struct clk wdt2_ick = { .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_WDT2_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "wkup_clkdm", .recalc = &followparent_recalc, }; @@ -2245,6 +2349,7 @@ static struct clk wdt1_ick = { .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_WDT1_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "wkup_clkdm", .recalc = &followparent_recalc, }; @@ -2254,6 +2359,7 @@ static struct clk gpio1_ick = { .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_GPIO1_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "wkup_clkdm", .recalc = &followparent_recalc, }; @@ -2263,15 +2369,18 @@ static struct clk omap_32ksync_ick = { .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_32KSYNC_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "wkup_clkdm", .recalc = &followparent_recalc, }; +/* XXX This clock no longer exists in 3430 TRM rev F */ static struct clk gpt12_ick = { .name = "gpt12_ick", .parent = &wkup_l4_ick, .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_GPT12_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "wkup_clkdm", .recalc = &followparent_recalc, }; @@ -2281,6 +2390,7 @@ static struct clk gpt1_ick = { .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_GPT1_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "wkup_clkdm", .recalc = &followparent_recalc, }; @@ -2291,16 +2401,20 @@ static struct clk gpt1_ick = { static struct clk per_96m_fck = { .name = "per_96m_fck", .parent = &omap_96m_alwon_fck, + .init = &omap2_init_clk_clkdm, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | PARENT_CONTROLS_CLOCK, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; static struct clk per_48m_fck = { .name = "per_48m_fck", .parent = &omap_48m_fck, + .init = &omap2_init_clk_clkdm, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | PARENT_CONTROLS_CLOCK, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2310,6 +2424,7 @@ static struct clk uart3_fck = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_FCLKEN), .enable_bit = OMAP3430_EN_UART3_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2322,6 +2437,7 @@ static struct clk gpt2_fck = { .clksel_mask = OMAP3430_CLKSEL_GPT2_MASK, .clksel = omap343x_gpt_clksel, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -2334,6 +2450,7 @@ static struct clk gpt3_fck = { .clksel_mask = OMAP3430_CLKSEL_GPT3_MASK, .clksel = omap343x_gpt_clksel, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -2346,6 +2463,7 @@ static struct clk gpt4_fck = { .clksel_mask = OMAP3430_CLKSEL_GPT4_MASK, .clksel = omap343x_gpt_clksel, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -2358,6 +2476,7 @@ static struct clk gpt5_fck = { .clksel_mask = OMAP3430_CLKSEL_GPT5_MASK, .clksel = omap343x_gpt_clksel, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -2370,6 +2489,7 @@ static struct clk gpt6_fck = { .clksel_mask = OMAP3430_CLKSEL_GPT6_MASK, .clksel = omap343x_gpt_clksel, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -2382,6 +2502,7 @@ static struct clk gpt7_fck = { .clksel_mask = OMAP3430_CLKSEL_GPT7_MASK, .clksel = omap343x_gpt_clksel, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -2394,6 +2515,7 @@ static struct clk gpt8_fck = { .clksel_mask = OMAP3430_CLKSEL_GPT8_MASK, .clksel = omap343x_gpt_clksel, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -2406,12 +2528,14 @@ static struct clk gpt9_fck = { .clksel_mask = OMAP3430_CLKSEL_GPT9_MASK, .clksel = omap343x_gpt_clksel, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &omap2_clksel_recalc, }; static struct clk per_32k_alwon_fck = { .name = "per_32k_alwon_fck", .parent = &omap_32k_fck, + .clkdm_name = "per_clkdm", .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | ALWAYS_ENABLED, .recalc = &followparent_recalc, }; @@ -2422,6 +2546,7 @@ static struct clk gpio6_fck = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_FCLKEN), .enable_bit = OMAP3430_EN_GPIO6_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2431,6 +2556,7 @@ static struct clk gpio5_fck = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_FCLKEN), .enable_bit = OMAP3430_EN_GPIO5_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2440,6 +2566,7 @@ static struct clk gpio4_fck = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_FCLKEN), .enable_bit = OMAP3430_EN_GPIO4_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2449,6 +2576,7 @@ static struct clk gpio3_fck = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_FCLKEN), .enable_bit = OMAP3430_EN_GPIO3_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2458,6 +2586,7 @@ static struct clk gpio2_fck = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_FCLKEN), .enable_bit = OMAP3430_EN_GPIO2_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2467,6 +2596,7 @@ static struct clk wdt3_fck = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_FCLKEN), .enable_bit = OMAP3430_EN_WDT3_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2475,6 +2605,7 @@ static struct clk per_l4_ick = { .parent = &l4_ick, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | PARENT_CONTROLS_CLOCK, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2484,6 +2615,7 @@ static struct clk gpio6_ick = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_GPIO6_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2493,6 +2625,7 @@ static struct clk gpio5_ick = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_GPIO5_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2502,6 +2635,7 @@ static struct clk gpio4_ick = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_GPIO4_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2511,6 +2645,7 @@ static struct clk gpio3_ick = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_GPIO3_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2520,6 +2655,7 @@ static struct clk gpio2_ick = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_GPIO2_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2529,6 +2665,7 @@ static struct clk wdt3_ick = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_WDT3_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2538,6 +2675,7 @@ static struct clk uart3_ick = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_UART3_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2547,6 +2685,7 @@ static struct clk gpt9_ick = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_GPT9_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2556,6 +2695,7 @@ static struct clk gpt8_ick = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_GPT8_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2565,6 +2705,7 @@ static struct clk gpt7_ick = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_GPT7_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2574,6 +2715,7 @@ static struct clk gpt6_ick = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_GPT6_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2583,6 +2725,7 @@ static struct clk gpt5_ick = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_GPT5_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2592,6 +2735,7 @@ static struct clk gpt4_ick = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_GPT4_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2601,6 +2745,7 @@ static struct clk gpt3_ick = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_GPT3_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2610,6 +2755,7 @@ static struct clk gpt2_ick = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_GPT2_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2620,6 +2766,7 @@ static struct clk mcbsp2_ick = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_MCBSP2_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2630,6 +2777,7 @@ static struct clk mcbsp3_ick = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_MCBSP3_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; @@ -2640,12 +2788,13 @@ static struct clk mcbsp4_ick = { .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_ICLKEN), .enable_bit = OMAP3430_EN_MCBSP4_SHIFT, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &followparent_recalc, }; static const struct clksel mcbsp_234_clksel[] = { { .parent = &per_96m_fck, .rates = common_mcbsp_96m_rates }, - { .parent = &mcbsp_clks, .rates = common_mcbsp_mcbsp_rates }, + { .parent = &mcbsp_clks, .rates = common_mcbsp_mcbsp_rates }, { .parent = NULL } }; @@ -2659,6 +2808,7 @@ static struct clk mcbsp2_fck = { .clksel_mask = OMAP2_MCBSP2_CLKS_MASK, .clksel = mcbsp_234_clksel, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -2672,6 +2822,7 @@ static struct clk mcbsp3_fck = { .clksel_mask = OMAP2_MCBSP3_CLKS_MASK, .clksel = mcbsp_234_clksel, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -2685,6 +2836,7 @@ static struct clk mcbsp4_fck = { .clksel_mask = OMAP2_MCBSP4_CLKS_MASK, .clksel = mcbsp_234_clksel, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "per_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -2732,6 +2884,7 @@ static struct clk emu_src_ck = { .clksel_mask = OMAP3430_MUX_CTRL_MASK, .clksel = emu_src_clksel, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | ALWAYS_ENABLED, + .clkdm_name = "emu_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -2755,6 +2908,7 @@ static struct clk pclk_fck = { .clksel_mask = OMAP3430_CLKSEL_PCLK_MASK, .clksel = pclk_emu_clksel, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | ALWAYS_ENABLED, + .clkdm_name = "emu_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -2777,6 +2931,7 @@ static struct clk pclkx2_fck = { .clksel_mask = OMAP3430_CLKSEL_PCLKX2_MASK, .clksel = pclkx2_emu_clksel, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | ALWAYS_ENABLED, + .clkdm_name = "emu_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -2792,6 +2947,7 @@ static struct clk atclk_fck = { .clksel_mask = OMAP3430_CLKSEL_ATCLK_MASK, .clksel = atclk_emu_clksel, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | ALWAYS_ENABLED, + .clkdm_name = "emu_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -2802,6 +2958,7 @@ static struct clk traceclk_src_fck = { .clksel_mask = OMAP3430_TRACE_MUX_CTRL_MASK, .clksel = emu_src_clksel, .flags = CLOCK_IN_OMAP343X | RATE_PROPAGATES | ALWAYS_ENABLED, + .clkdm_name = "emu_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -2824,6 +2981,7 @@ static struct clk traceclk_fck = { .clksel_mask = OMAP3430_CLKSEL_TRACECLK_MASK, .clksel = traceclk_clksel, .flags = CLOCK_IN_OMAP343X | ALWAYS_ENABLED, + .clkdm_name = "emu_clkdm", .recalc = &omap2_clksel_recalc, }; @@ -2853,11 +3011,13 @@ static struct clk sr_l4_ick = { .name = "sr_l4_ick", .parent = &l4_ick, .flags = CLOCK_IN_OMAP343X, + .clkdm_name = "core_l4_clkdm", .recalc = &followparent_recalc, }; /* SECURE_32K_FCK clocks */ +/* XXX This clock no longer exists in 3430 TRM rev F */ static struct clk gpt12_fck = { .name = "gpt12_fck", .parent = &secure_32k_fck, diff --git a/arch/arm/mach-omap2/clockdomains.h b/arch/arm/mach-omap2/clockdomains.h index a276320..cd86dcc 100644 --- a/arch/arm/mach-omap2/clockdomains.h +++ b/arch/arm/mach-omap2/clockdomains.h @@ -168,12 +168,19 @@ static struct clockdomain sgx_clkdm = { .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430ES2), }; +/* + * The die-to-die clockdomain was documented in the 34xx ES1 TRM, but + * then that information was removed from the 34xx ES2+ TRM. It is + * unclear whether the core is still there, but the clockdomain logic + * is there, and must be programmed to an appropriate state if the + * CORE clockdomain is to become inactive. + */ static struct clockdomain d2d_clkdm = { .name = "d2d_clkdm", .pwrdm_name = "core_pwrdm", .flags = CLKDM_CAN_HWSUP, .clktrctrl_mask = OMAP3430ES1_CLKTRCTRL_D2D_MASK, - .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430ES1), + .omap_chip = OMAP_CHIP_INIT(CHIP_IS_OMAP3430), }; static struct clockdomain core_l3_34xx_clkdm = { -- cgit v0.10.2 From 5955902fb5c31f6a784ddb7aa16079a2bec588f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=B6gander=20Jouni?= Date: Tue, 19 Aug 2008 11:08:45 +0300 Subject: ARM: OMAP2: Clock: Combine 34xx l3_icks and l4_icks E.g dss_l3_ick and dss_l4_ick have same gating control. Having own clock for both of them causes race condition between enable / disable. This patch combines this kind of clocks and names new clock as _ick. Signed-off-by: Jouni Hogander Acked-by: Paul Walmsley Signed-off-by: Tony Lindgren diff --git a/arch/arm/mach-omap2/clock34xx.h b/arch/arm/mach-omap2/clock34xx.h index bb01e20..c38a8a0 100644 --- a/arch/arm/mach-omap2/clock34xx.h +++ b/arch/arm/mach-omap2/clock34xx.h @@ -1189,27 +1189,34 @@ static const struct clksel gfx_l3_clksel[] = { { .parent = NULL } }; -static struct clk gfx_l3_fck = { - .name = "gfx_l3_fck", +/* Virtual parent clock for gfx_l3_ick and gfx_l3_fck */ +static struct clk gfx_l3_ck = { + .name = "gfx_l3_ck", .parent = &l3_ick, .init = &omap2_init_clksel_parent, .enable_reg = OMAP_CM_REGADDR(GFX_MOD, CM_ICLKEN), .enable_bit = OMAP_EN_GFX_SHIFT, + .flags = CLOCK_IN_OMAP3430ES1, + .recalc = &followparent_recalc, +}; + +static struct clk gfx_l3_fck = { + .name = "gfx_l3_fck", + .parent = &gfx_l3_ck, + .init = &omap2_init_clksel_parent, .clksel_reg = OMAP_CM_REGADDR(GFX_MOD, CM_CLKSEL), .clksel_mask = OMAP_CLKSEL_GFX_MASK, .clksel = gfx_l3_clksel, - .flags = CLOCK_IN_OMAP3430ES1 | RATE_PROPAGATES, + .flags = CLOCK_IN_OMAP3430ES1 | RATE_PROPAGATES | + PARENT_CONTROLS_CLOCK, .clkdm_name = "gfx_3430es1_clkdm", .recalc = &omap2_clksel_recalc, }; static struct clk gfx_l3_ick = { .name = "gfx_l3_ick", - .parent = &l3_ick, - .init = &omap2_init_clk_clkdm, - .enable_reg = OMAP_CM_REGADDR(GFX_MOD, CM_ICLKEN), - .enable_bit = OMAP_EN_GFX_SHIFT, - .flags = CLOCK_IN_OMAP3430ES1, + .parent = &gfx_l3_ck, + .flags = CLOCK_IN_OMAP3430ES1 | PARENT_CONTROLS_CLOCK, .clkdm_name = "gfx_3430es1_clkdm", .recalc = &followparent_recalc, }; @@ -2153,19 +2160,9 @@ static struct clk cam_mclk = { .recalc = &omap2_clksel_recalc, }; -static struct clk cam_l3_ick = { - .name = "cam_l3_ick", - .parent = &l3_ick, - .init = &omap2_init_clk_clkdm, - .enable_reg = OMAP_CM_REGADDR(OMAP3430_CAM_MOD, CM_ICLKEN), - .enable_bit = OMAP3430_EN_CAM_SHIFT, - .flags = CLOCK_IN_OMAP343X, - .clkdm_name = "cam_clkdm", - .recalc = &followparent_recalc, -}; - -static struct clk cam_l4_ick = { - .name = "cam_l4_ick", +static struct clk cam_ick = { + /* Handles both L3 and L4 clocks */ + .name = "cam_ick", .parent = &l4_ick, .init = &omap2_init_clk_clkdm, .enable_reg = OMAP_CM_REGADDR(OMAP3430_CAM_MOD, CM_ICLKEN), @@ -2199,19 +2196,9 @@ static struct clk usbhost_48m_fck = { .recalc = &followparent_recalc, }; -static struct clk usbhost_l3_ick = { - .name = "usbhost_l3_ick", - .parent = &l3_ick, - .init = &omap2_init_clk_clkdm, - .enable_reg = OMAP_CM_REGADDR(OMAP3430ES2_USBHOST_MOD, CM_ICLKEN), - .enable_bit = OMAP3430ES2_EN_USBHOST_SHIFT, - .flags = CLOCK_IN_OMAP3430ES2, - .clkdm_name = "usbhost_clkdm", - .recalc = &followparent_recalc, -}; - -static struct clk usbhost_l4_ick = { - .name = "usbhost_l4_ick", +static struct clk usbhost_ick = { + /* Handles both L3 and L4 clocks */ + .name = "usbhost_ick", .parent = &l4_ick, .init = &omap2_init_clk_clkdm, .enable_reg = OMAP_CM_REGADDR(OMAP3430ES2_USBHOST_MOD, CM_ICLKEN), @@ -3093,6 +3080,7 @@ static struct clk *onchip_34xx_clks[] __initdata = { &l3_ick, &l4_ick, &rm_ick, + &gfx_l3_ck, &gfx_l3_fck, &gfx_l3_ick, &gfx_cg1_ck, @@ -3174,12 +3162,10 @@ static struct clk *onchip_34xx_clks[] __initdata = { &dss2_alwon_fck, &dss_ick, &cam_mclk, - &cam_l3_ick, - &cam_l4_ick, + &cam_ick, &usbhost_120m_fck, &usbhost_48m_fck, - &usbhost_l3_ick, - &usbhost_l4_ick, + &usbhost_ick, &usbhost_sar_fck, &usim_fck, &gpt1_fck, -- cgit v0.10.2 From 0c925d79234fe77589d8ff3861f9f8bb9e7fc3f6 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Mon, 18 Aug 2008 21:49:51 -0700 Subject: rcuclassic: fix compilation NG fix: CC kernel/rcuclassic.o kernel/rcuclassic.c: In function '__rcu_process_callbacks': kernel/rcuclassic.c:561: error: 'flags' undeclared (first use in this function) kernel/rcuclassic.c:561: error: (Each undeclared identifier is reported only once kernel/rcuclassic.c:561: error: for each function it appears in.) Declare missing variable flags. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index c6b6cf5..01e761a 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -557,6 +557,8 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, local_irq_enable(); if (rcu_batch_after(rdp->batch, rcp->pending)) { + unsigned long flags; + /* and start it/schedule start if it's a new batch */ spin_lock_irqsave(&rcp->lock, flags); if (rcu_batch_after(rdp->batch, rcp->pending)) { -- cgit v0.10.2 From af4491e51632d01fbc2b856ffa9ebcd4b38db68c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 19 Aug 2008 12:33:02 +0200 Subject: sched: rt-bandwidth for user grouping interface rt_runtime is a signed value Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/kernel/user.c b/kernel/user.c index 865ecf57..39d6159 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -169,7 +169,7 @@ static ssize_t cpu_rt_runtime_show(struct kobject *kobj, { struct user_struct *up = container_of(kobj, struct user_struct, kobj); - return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg)); + return sprintf(buf, "%ld\n", sched_group_rt_runtime(up->tg)); } static ssize_t cpu_rt_runtime_store(struct kobject *kobj, @@ -180,7 +180,7 @@ static ssize_t cpu_rt_runtime_store(struct kobject *kobj, unsigned long rt_runtime; int rc; - sscanf(buf, "%lu", &rt_runtime); + sscanf(buf, "%ld", &rt_runtime); rc = sched_group_set_rt_runtime(up->tg, rt_runtime); -- cgit v0.10.2 From 6f0d5c390e4206dcb3804a5072a048fdb7d2b428 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 19 Aug 2008 12:33:03 +0200 Subject: sched: rt-bandwidth accounting fix It fixes an accounting bug where we would continue accumulating runtime even though the bandwidth control is disabled. This would lead to very long throttle periods once bandwidth control gets turned on again. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 998ba54b..77340b0 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -438,9 +438,6 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) { u64 runtime = sched_rt_runtime(rt_rq); - if (runtime == RUNTIME_INF) - return 0; - if (rt_rq->rt_throttled) return rt_rq_throttled(rt_rq); @@ -491,9 +488,11 @@ static void update_curr_rt(struct rq *rq) rt_rq = rt_rq_of_se(rt_se); spin_lock(&rt_rq->rt_runtime_lock); - rt_rq->rt_time += delta_exec; - if (sched_rt_runtime_exceeded(rt_rq)) - resched_task(curr); + if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { + rt_rq->rt_time += delta_exec; + if (sched_rt_runtime_exceeded(rt_rq)) + resched_task(curr); + } spin_unlock(&rt_rq->rt_runtime_lock); } } -- cgit v0.10.2 From 0b148fa04852859972abbf848177b92daeef138a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 19 Aug 2008 12:33:04 +0200 Subject: sched: rt-bandwidth group disable fixes More extensive disable of bandwidth control. It allows sysctl_sched_rt_runtime to disable full group bandwidth control. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index 9a1ddb8..c1bee5f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -204,11 +204,13 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; } +static inline int rt_bandwidth_enabled(void); + static void start_rt_bandwidth(struct rt_bandwidth *rt_b) { ktime_t now; - if (rt_b->rt_runtime == RUNTIME_INF) + if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF) return; if (hrtimer_active(&rt_b->rt_period_timer)) @@ -839,6 +841,11 @@ static inline u64 global_rt_runtime(void) return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; } +static inline int rt_bandwidth_enabled(void) +{ + return sysctl_sched_rt_runtime >= 0; +} + #ifndef prepare_arch_switch # define prepare_arch_switch(next) do { } while (0) #endif diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 77340b0..94daace 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -386,7 +386,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) int i, idle = 1; cpumask_t span; - if (rt_b->rt_runtime == RUNTIME_INF) + if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) return 1; span = sched_rt_period_mask(); @@ -484,6 +484,9 @@ static void update_curr_rt(struct rq *rq) curr->se.exec_start = rq->clock; cpuacct_charge(curr, delta_exec); + if (!rt_bandwidth_enabled()) + return; + for_each_sched_rt_entity(rt_se) { rt_rq = rt_rq_of_se(rt_se); -- cgit v0.10.2 From eb755805f21bd5ded84026e167b7a90887ac42e5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 19 Aug 2008 12:33:05 +0200 Subject: sched: extract walk_tg_tree() Extract walk_tg_tree() and make it a little more generic so we can use it in the schedulablity test. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index c1bee5f..8c019a1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1387,38 +1387,24 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load) update_load_sub(&rq->load, load); } -#ifdef CONFIG_SMP -static unsigned long source_load(int cpu, int type); -static unsigned long target_load(int cpu, int type); -static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); - -static unsigned long cpu_avg_load_per_task(int cpu) -{ - struct rq *rq = cpu_rq(cpu); - - if (rq->nr_running) - rq->avg_load_per_task = rq->load.weight / rq->nr_running; - - return rq->avg_load_per_task; -} - -#ifdef CONFIG_FAIR_GROUP_SCHED - -typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *); +#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) +typedef int (*tg_visitor)(struct task_group *, void *); /* * Iterate the full tree, calling @down when first entering a node and @up when * leaving it for the final time. */ -static void -walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd) +static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) { struct task_group *parent, *child; + int ret; rcu_read_lock(); parent = &root_task_group; down: - (*down)(parent, cpu, sd); + ret = (*down)(parent, data); + if (ret) + goto out_unlock; list_for_each_entry_rcu(child, &parent->children, siblings) { parent = child; goto down; @@ -1426,14 +1412,42 @@ down: up: continue; } - (*up)(parent, cpu, sd); + ret = (*up)(parent, data); + if (ret) + goto out_unlock; child = parent; parent = parent->parent; if (parent) goto up; +out_unlock: rcu_read_unlock(); + + return ret; +} + +static int tg_nop(struct task_group *tg, void *data) +{ + return 0; } +#endif + +#ifdef CONFIG_SMP +static unsigned long source_load(int cpu, int type); +static unsigned long target_load(int cpu, int type); +static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); + +static unsigned long cpu_avg_load_per_task(int cpu) +{ + struct rq *rq = cpu_rq(cpu); + + if (rq->nr_running) + rq->avg_load_per_task = rq->load.weight / rq->nr_running; + + return rq->avg_load_per_task; +} + +#ifdef CONFIG_FAIR_GROUP_SCHED static void __set_se_shares(struct sched_entity *se, unsigned long shares); @@ -1493,11 +1507,11 @@ __update_group_shares_cpu(struct task_group *tg, int cpu, * This needs to be done in a bottom-up fashion because the rq weight of a * parent group depends on the shares of its child groups. */ -static void -tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd) +static int tg_shares_up(struct task_group *tg, void *data) { unsigned long rq_weight = 0; unsigned long shares = 0; + struct sched_domain *sd = data; int i; for_each_cpu_mask(i, sd->span) { @@ -1522,6 +1536,8 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd) __update_group_shares_cpu(tg, i, shares, rq_weight); spin_unlock_irqrestore(&rq->lock, flags); } + + return 0; } /* @@ -1529,10 +1545,10 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd) * This needs to be done in a top-down fashion because the load of a child * group is a fraction of its parents load. */ -static void -tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd) +static int tg_load_down(struct task_group *tg, void *data) { unsigned long load; + long cpu = (long)data; if (!tg->parent) { load = cpu_rq(cpu)->load.weight; @@ -1543,11 +1559,8 @@ tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd) } tg->cfs_rq[cpu]->h_load = load; -} -static void -tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd) -{ + return 0; } static void update_shares(struct sched_domain *sd) @@ -1557,7 +1570,7 @@ static void update_shares(struct sched_domain *sd) if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { sd->last_update = now; - walk_tg_tree(tg_nop, tg_shares_up, 0, sd); + walk_tg_tree(tg_nop, tg_shares_up, sd); } } @@ -1568,9 +1581,9 @@ static void update_shares_locked(struct rq *rq, struct sched_domain *sd) spin_lock(&rq->lock); } -static void update_h_load(int cpu) +static void update_h_load(long cpu) { - walk_tg_tree(tg_load_down, tg_nop, cpu, NULL); + walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); } #else -- cgit v0.10.2 From 9a7e0b180da21885988d47558671cf580279f9d6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 19 Aug 2008 12:33:06 +0200 Subject: sched: rt-bandwidth fixes The last patch allows sysctl_sched_rt_runtime to disable bandwidth accounting for the group scheduler - however it doesn't deal with sched_setscheduler(), which will keep tasks out of groups that have no assigned runtime. If we relax this, we get into the situation where RT tasks can get into a group when we disable bandwidth control, and then starve them by enabling it again. Rework the schedulability code to check for this condition and fail to turn on bandwidth control with -EBUSY when this situation is found. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index 8c019a1..e41bdae 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -300,9 +300,9 @@ static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; #endif /* CONFIG_RT_GROUP_SCHED */ -#else /* !CONFIG_FAIR_GROUP_SCHED */ +#else /* !CONFIG_USER_SCHED */ #define root_task_group init_task_group -#endif /* CONFIG_FAIR_GROUP_SCHED */ +#endif /* CONFIG_USER_SCHED */ /* task_group_lock serializes add/remove of task groups and also changes to * a task group's cpu shares. @@ -1387,7 +1387,7 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load) update_load_sub(&rq->load, load); } -#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) +#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(SCHED_RT_GROUP_SCHED) typedef int (*tg_visitor)(struct task_group *, void *); /* @@ -5082,7 +5082,8 @@ recheck: * Do not allow realtime tasks into groups that have no runtime * assigned. */ - if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0) + if (rt_bandwidth_enabled() && rt_policy(policy) && + task_group(p)->rt_bandwidth.rt_runtime == 0) return -EPERM; #endif @@ -8707,73 +8708,77 @@ static DEFINE_MUTEX(rt_constraints_mutex); static unsigned long to_ratio(u64 period, u64 runtime) { if (runtime == RUNTIME_INF) - return 1ULL << 16; + return 1ULL << 20; - return div64_u64(runtime << 16, period); + return div64_u64(runtime << 20, period); } -#ifdef CONFIG_CGROUP_SCHED -static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) +/* Must be called with tasklist_lock held */ +static inline int tg_has_rt_tasks(struct task_group *tg) { - struct task_group *tgi, *parent = tg->parent; - unsigned long total = 0; + struct task_struct *g, *p; - if (!parent) { - if (global_rt_period() < period) - return 0; + do_each_thread(g, p) { + if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg) + return 1; + } while_each_thread(g, p); - return to_ratio(period, runtime) < - to_ratio(global_rt_period(), global_rt_runtime()); - } + return 0; +} - if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period) - return 0; +struct rt_schedulable_data { + struct task_group *tg; + u64 rt_period; + u64 rt_runtime; +}; - rcu_read_lock(); - list_for_each_entry_rcu(tgi, &parent->children, siblings) { - if (tgi == tg) - continue; +static int tg_schedulable(struct task_group *tg, void *data) +{ + struct rt_schedulable_data *d = data; + struct task_group *child; + unsigned long total, sum = 0; + u64 period, runtime; + + period = ktime_to_ns(tg->rt_bandwidth.rt_period); + runtime = tg->rt_bandwidth.rt_runtime; - total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period), - tgi->rt_bandwidth.rt_runtime); + if (tg == d->tg) { + period = d->rt_period; + runtime = d->rt_runtime; } - rcu_read_unlock(); - return total + to_ratio(period, runtime) <= - to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period), - parent->rt_bandwidth.rt_runtime); -} -#elif defined CONFIG_USER_SCHED -static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) -{ - struct task_group *tgi; - unsigned long total = 0; - unsigned long global_ratio = - to_ratio(global_rt_period(), global_rt_runtime()); + if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg)) + return -EBUSY; - rcu_read_lock(); - list_for_each_entry_rcu(tgi, &task_groups, list) { - if (tgi == tg) - continue; + total = to_ratio(period, runtime); - total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period), - tgi->rt_bandwidth.rt_runtime); + list_for_each_entry_rcu(child, &tg->children, siblings) { + period = ktime_to_ns(child->rt_bandwidth.rt_period); + runtime = child->rt_bandwidth.rt_runtime; + + if (child == d->tg) { + period = d->rt_period; + runtime = d->rt_runtime; + } + + sum += to_ratio(period, runtime); } - rcu_read_unlock(); - return total + to_ratio(period, runtime) < global_ratio; + if (sum > total) + return -EINVAL; + + return 0; } -#endif -/* Must be called with tasklist_lock held */ -static inline int tg_has_rt_tasks(struct task_group *tg) +static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) { - struct task_struct *g, *p; - do_each_thread(g, p) { - if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg) - return 1; - } while_each_thread(g, p); - return 0; + struct rt_schedulable_data data = { + .tg = tg, + .rt_period = period, + .rt_runtime = runtime, + }; + + return walk_tg_tree(tg_schedulable, tg_nop, &data); } static int tg_set_bandwidth(struct task_group *tg, @@ -8783,14 +8788,9 @@ static int tg_set_bandwidth(struct task_group *tg, mutex_lock(&rt_constraints_mutex); read_lock(&tasklist_lock); - if (rt_runtime == 0 && tg_has_rt_tasks(tg)) { - err = -EBUSY; + err = __rt_schedulable(tg, rt_period, rt_runtime); + if (err) goto unlock; - } - if (!__rt_schedulable(tg, rt_period, rt_runtime)) { - err = -EINVAL; - goto unlock; - } spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock); tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period); @@ -8867,8 +8867,9 @@ static int sched_rt_global_constraints(void) rt_runtime = tg->rt_bandwidth.rt_runtime; mutex_lock(&rt_constraints_mutex); - if (!__rt_schedulable(tg, rt_period, rt_runtime)) - ret = -EINVAL; + read_lock(&tasklist_lock); + ret = __rt_schedulable(tg, rt_period, rt_runtime); + read_unlock(&tasklist_lock); mutex_unlock(&rt_constraints_mutex); return ret; -- cgit v0.10.2 From 7dc964148c2850e5df84fc17807ba48edb628c88 Mon Sep 17 00:00:00 2001 From: Ian Molton Date: Tue, 19 Aug 2008 12:14:21 +0100 Subject: [ARM] eseries: Split machine definitions This patchset breaks out the e-series machine definitions into one-per-machine. Signed-off-by: Ian Molton diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile index 99ecbe7..38db581 100644 --- a/arch/arm/mach-pxa/Makefile +++ b/arch/arm/mach-pxa/Makefile @@ -38,10 +38,12 @@ obj-$(CONFIG_MACH_TOSA) += tosa.o obj-$(CONFIG_MACH_EM_X270) += em-x270.o obj-$(CONFIG_MACH_MAGICIAN) += magician.o obj-$(CONFIG_ARCH_PXA_ESERIES) += eseries.o eseries_udc.o -obj-$(CONFIG_MACH_E740) += e740_lcd.o -obj-$(CONFIG_MACH_E750) += e750_lcd.o -obj-$(CONFIG_MACH_E400) += e400_lcd.o -obj-$(CONFIG_MACH_E800) += e800_lcd.o +obj-$(CONFIG_MACH_E330) += e330.o +obj-$(CONFIG_MACH_E350) += e350.o +obj-$(CONFIG_MACH_E740) += e740.o e740_lcd.o +obj-$(CONFIG_MACH_E750) += e750.o e750_lcd.o +obj-$(CONFIG_MACH_E400) += e400.o e400_lcd.o +obj-$(CONFIG_MACH_E800) += e800.o e800_lcd.o obj-$(CONFIG_MACH_PALMTX) += palmtx.o ifeq ($(CONFIG_MACH_ZYLONITE),y) diff --git a/arch/arm/mach-pxa/e330.c b/arch/arm/mach-pxa/e330.c new file mode 100644 index 0000000..2f4555e --- /dev/null +++ b/arch/arm/mach-pxa/e330.c @@ -0,0 +1,36 @@ +/* + * Hardware definitions for the Toshiba eseries PDAs + * + * Copyright (c) 2003 Ian Molton + * + * This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + * + */ + +#include +#include + +#include +#include +#include + +#include +#include + +#include "generic.h" +#include "eseries.h" + +MACHINE_START(E330, "Toshiba e330") + /* Maintainer: Ian Molton (spyro@f2s.com) */ + .phys_io = 0x40000000, + .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc, + .boot_params = 0xa0000100, + .map_io = pxa_map_io, + .init_irq = pxa25x_init_irq, + .fixup = eseries_fixup, + .timer = &pxa_timer, +MACHINE_END + diff --git a/arch/arm/mach-pxa/e350.c b/arch/arm/mach-pxa/e350.c new file mode 100644 index 0000000..5d58deb --- /dev/null +++ b/arch/arm/mach-pxa/e350.c @@ -0,0 +1,36 @@ +/* + * Hardware definitions for the Toshiba eseries PDAs + * + * Copyright (c) 2003 Ian Molton + * + * This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + * + */ + +#include +#include + +#include +#include +#include + +#include +#include + +#include "generic.h" +#include "eseries.h" + +MACHINE_START(E350, "Toshiba e350") + /* Maintainer: Ian Molton (spyro@f2s.com) */ + .phys_io = 0x40000000, + .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc, + .boot_params = 0xa0000100, + .map_io = pxa_map_io, + .init_irq = pxa25x_init_irq, + .fixup = eseries_fixup, + .timer = &pxa_timer, +MACHINE_END + diff --git a/arch/arm/mach-pxa/e400.c b/arch/arm/mach-pxa/e400.c new file mode 100644 index 0000000..405e6dc --- /dev/null +++ b/arch/arm/mach-pxa/e400.c @@ -0,0 +1,60 @@ +/* + * Hardware definitions for the Toshiba eseries PDAs + * + * Copyright (c) 2003 Ian Molton + * + * This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + * + */ + +#include +#include + +#include +#include +#include + +#include +#include + +#include "generic.h" +#include "eseries.h" + +static unsigned long e400_pin_config[] __initdata = { + /* Chip selects */ + GPIO15_nCS_1, /* CS1 - Flash */ + GPIO80_nCS_4, /* CS4 - TMIO */ + + /* Clocks */ + GPIO12_32KHz, + + /* BTUART */ + GPIO42_BTUART_RXD, + GPIO43_BTUART_TXD, + GPIO44_BTUART_CTS, + GPIO45_GPIO, /* Used by TMIO for #SUSPEND */ + + /* wakeup */ + GPIO0_GPIO | WAKEUP_ON_EDGE_RISE, +}; + +static void __init e400_init(void) +{ + pxa2xx_mfp_config(ARRAY_AND_SIZE(e400_pin_config)); +} + +MACHINE_START(E400, "Toshiba e400") + /* Maintainer: Ian Molton (spyro@f2s.com) */ + .phys_io = 0x40000000, + .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc, + .boot_params = 0xa0000100, + .map_io = pxa_map_io, + .init_irq = pxa25x_init_irq, + .fixup = eseries_fixup, + .init_machine = e400_init, + .timer = &pxa_timer, +MACHINE_END + diff --git a/arch/arm/mach-pxa/e740.c b/arch/arm/mach-pxa/e740.c new file mode 100644 index 0000000..560a8a2 --- /dev/null +++ b/arch/arm/mach-pxa/e740.c @@ -0,0 +1,81 @@ +/* + * Hardware definitions for the Toshiba eseries PDAs + * + * Copyright (c) 2003 Ian Molton + * + * This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + * + */ + +#include +#include + +#include +#include +#include + +#include +#include + +#include "generic.h" +#include "eseries.h" + +static unsigned long e740_pin_config[] __initdata = { + /* Chip selects */ + GPIO15_nCS_1, /* CS1 - Flash */ + GPIO79_nCS_3, /* CS3 - IMAGEON */ + GPIO80_nCS_4, /* CS4 - TMIO */ + + /* Clocks */ + GPIO12_32KHz, + + /* BTUART */ + GPIO42_BTUART_RXD, + GPIO43_BTUART_TXD, + GPIO44_BTUART_CTS, + GPIO45_GPIO, /* Used by TMIO for #SUSPEND */ + + /* PC Card */ + GPIO8_GPIO, /* CD0 */ + GPIO44_GPIO, /* CD1 */ + GPIO11_GPIO, /* IRQ0 */ + GPIO6_GPIO, /* IRQ1 */ + GPIO27_GPIO, /* RST0 */ + GPIO24_GPIO, /* RST1 */ + GPIO20_GPIO, /* PWR0 */ + GPIO23_GPIO, /* PWR1 */ + GPIO48_nPOE, + GPIO49_nPWE, + GPIO50_nPIOR, + GPIO51_nPIOW, + GPIO52_nPCE_1, + GPIO53_nPCE_2, + GPIO54_nPSKTSEL, + GPIO55_nPREG, + GPIO56_nPWAIT, + GPIO57_nIOIS16, + + /* wakeup */ + GPIO0_GPIO | WAKEUP_ON_EDGE_RISE, +}; + +static void __init e740_init(void) +{ + pxa2xx_mfp_config(ARRAY_AND_SIZE(e740_pin_config)); +} + +MACHINE_START(E740, "Toshiba e740") + /* Maintainer: Ian Molton (spyro@f2s.com) */ + .phys_io = 0x40000000, + .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc, + .boot_params = 0xa0000100, + .map_io = pxa_map_io, + .init_irq = pxa25x_init_irq, + .fixup = eseries_fixup, + .init_machine = e740_init, + .timer = &pxa_timer, +MACHINE_END + diff --git a/arch/arm/mach-pxa/e750.c b/arch/arm/mach-pxa/e750.c new file mode 100644 index 0000000..bb6f814d --- /dev/null +++ b/arch/arm/mach-pxa/e750.c @@ -0,0 +1,36 @@ +/* + * Hardware definitions for the Toshiba eseries PDAs + * + * Copyright (c) 2003 Ian Molton + * + * This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + * + */ + +#include +#include + +#include +#include +#include + +#include +#include + +#include "generic.h" +#include "eseries.h" + +MACHINE_START(E750, "Toshiba e750") + /* Maintainer: Ian Molton (spyro@f2s.com) */ + .phys_io = 0x40000000, + .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc, + .boot_params = 0xa0000100, + .map_io = pxa_map_io, + .init_irq = pxa25x_init_irq, + .fixup = eseries_fixup, + .timer = &pxa_timer, +MACHINE_END + diff --git a/arch/arm/mach-pxa/e800.c b/arch/arm/mach-pxa/e800.c new file mode 100644 index 0000000..77ebe91 --- /dev/null +++ b/arch/arm/mach-pxa/e800.c @@ -0,0 +1,36 @@ +/* + * Hardware definitions for the Toshiba eseries PDAs + * + * Copyright (c) 2003 Ian Molton + * + * This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + * + */ + +#include +#include + +#include +#include +#include + +#include +#include + +#include "generic.h" +#include "eseries.h" + +MACHINE_START(E800, "Toshiba e800") + /* Maintainer: Ian Molton (spyro@f2s.com) */ + .phys_io = 0x40000000, + .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc, + .boot_params = 0xa0000100, + .map_io = pxa_map_io, + .init_irq = pxa25x_init_irq, + .fixup = eseries_fixup, + .timer = &pxa_timer, +MACHINE_END + diff --git a/arch/arm/mach-pxa/eseries.c b/arch/arm/mach-pxa/eseries.c index 001a252..1872faa 100644 --- a/arch/arm/mach-pxa/eseries.c +++ b/arch/arm/mach-pxa/eseries.c @@ -22,65 +22,8 @@ #include "generic.h" -static unsigned long e740_pin_config[] __initdata = { - /* Chip selects */ - GPIO15_nCS_1, /* CS1 - Flash */ - GPIO79_nCS_3, /* CS3 - IMAGEON */ - GPIO80_nCS_4, /* CS4 - TMIO */ - - /* Clocks */ - GPIO12_32KHz, - - /* BTUART */ - GPIO42_BTUART_RXD, - GPIO43_BTUART_TXD, - GPIO44_BTUART_CTS, - GPIO45_GPIO, /* Used by TMIO for #SUSPEND */ - - /* PC Card */ - GPIO8_GPIO, /* CD0 */ - GPIO44_GPIO, /* CD1 */ - GPIO11_GPIO, /* IRQ0 */ - GPIO6_GPIO, /* IRQ1 */ - GPIO27_GPIO, /* RST0 */ - GPIO24_GPIO, /* RST1 */ - GPIO20_GPIO, /* PWR0 */ - GPIO23_GPIO, /* PWR1 */ - GPIO48_nPOE, - GPIO49_nPWE, - GPIO50_nPIOR, - GPIO51_nPIOW, - GPIO52_nPCE_1, - GPIO53_nPCE_2, - GPIO54_nPSKTSEL, - GPIO55_nPREG, - GPIO56_nPWAIT, - GPIO57_nIOIS16, - - /* wakeup */ - GPIO0_GPIO | WAKEUP_ON_EDGE_RISE, -}; - -static unsigned long e400_pin_config[] __initdata = { - /* Chip selects */ - GPIO15_nCS_1, /* CS1 - Flash */ - GPIO80_nCS_4, /* CS4 - TMIO */ - - /* Clocks */ - GPIO12_32KHz, - - /* BTUART */ - GPIO42_BTUART_RXD, - GPIO43_BTUART_TXD, - GPIO44_BTUART_CTS, - GPIO45_GPIO, /* Used by TMIO for #SUSPEND */ - - /* wakeup */ - GPIO0_GPIO | WAKEUP_ON_EDGE_RISE, -}; - /* Only e800 has 128MB RAM */ -static void __init eseries_fixup(struct machine_desc *desc, +void __init eseries_fixup(struct machine_desc *desc, struct tag *tags, char **cmdline, struct meminfo *mi) { mi->nr_banks=1; @@ -92,95 +35,3 @@ static void __init eseries_fixup(struct machine_desc *desc, mi->bank[0].size = (64*1024*1024); } -static void __init e740_init(void) -{ - pxa2xx_mfp_config(ARRAY_AND_SIZE(e740_pin_config)); -} - -static void __init e400_init(void) -{ - pxa2xx_mfp_config(ARRAY_AND_SIZE(e400_pin_config)); -} - -/* e-series machine definitions */ - -#ifdef CONFIG_MACH_E330 -MACHINE_START(E330, "Toshiba e330") - /* Maintainer: Ian Molton (spyro@f2s.com) */ - .phys_io = 0x40000000, - .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc, - .boot_params = 0xa0000100, - .map_io = pxa_map_io, - .init_irq = pxa25x_init_irq, - .fixup = eseries_fixup, - .timer = &pxa_timer, -MACHINE_END -#endif - -#ifdef CONFIG_MACH_E350 -MACHINE_START(E350, "Toshiba e350") - /* Maintainer: Ian Molton (spyro@f2s.com) */ - .phys_io = 0x40000000, - .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc, - .boot_params = 0xa0000100, - .map_io = pxa_map_io, - .init_irq = pxa25x_init_irq, - .fixup = eseries_fixup, - .timer = &pxa_timer, -MACHINE_END -#endif - -#ifdef CONFIG_MACH_E740 -MACHINE_START(E740, "Toshiba e740") - /* Maintainer: Ian Molton (spyro@f2s.com) */ - .phys_io = 0x40000000, - .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc, - .boot_params = 0xa0000100, - .map_io = pxa_map_io, - .init_irq = pxa25x_init_irq, - .fixup = eseries_fixup, - .init_machine = e740_init, - .timer = &pxa_timer, -MACHINE_END -#endif - -#ifdef CONFIG_MACH_E750 -MACHINE_START(E750, "Toshiba e750") - /* Maintainer: Ian Molton (spyro@f2s.com) */ - .phys_io = 0x40000000, - .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc, - .boot_params = 0xa0000100, - .map_io = pxa_map_io, - .init_irq = pxa25x_init_irq, - .fixup = eseries_fixup, - .timer = &pxa_timer, -MACHINE_END -#endif - -#ifdef CONFIG_MACH_E400 -MACHINE_START(E400, "Toshiba e400") - /* Maintainer: Ian Molton (spyro@f2s.com) */ - .phys_io = 0x40000000, - .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc, - .boot_params = 0xa0000100, - .map_io = pxa_map_io, - .init_irq = pxa25x_init_irq, - .fixup = eseries_fixup, - .init_machine = e400_init, - .timer = &pxa_timer, -MACHINE_END -#endif - -#ifdef CONFIG_MACH_E800 -MACHINE_START(E800, "Toshiba e800") - /* Maintainer: Ian Molton (spyro@f2s.com) */ - .phys_io = 0x40000000, - .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc, - .boot_params = 0xa0000100, - .map_io = pxa_map_io, - .init_irq = pxa25x_init_irq, - .fixup = eseries_fixup, - .timer = &pxa_timer, -MACHINE_END -#endif - diff --git a/arch/arm/mach-pxa/eseries.h b/arch/arm/mach-pxa/eseries.h new file mode 100644 index 0000000..bc74e21 --- /dev/null +++ b/arch/arm/mach-pxa/eseries.h @@ -0,0 +1,3 @@ +void __init eseries_fixup(struct machine_desc *desc, + struct tag *tags, char **cmdline, struct meminfo *mi); + -- cgit v0.10.2 From 0ec3cf69304d2e35ad29da3aa9ab43c3dd97677c Mon Sep 17 00:00:00 2001 From: Ian Molton Date: Tue, 19 Aug 2008 13:01:28 +0100 Subject: [ARM] eseries: move LCD defs into machine files This patch removes the seperate files used for the LCD definitions on e-series and places the definitions into the machine specific files. Signed-off-by: Ian Molton diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile index 38db581..b536a50 100644 --- a/arch/arm/mach-pxa/Makefile +++ b/arch/arm/mach-pxa/Makefile @@ -40,10 +40,10 @@ obj-$(CONFIG_MACH_MAGICIAN) += magician.o obj-$(CONFIG_ARCH_PXA_ESERIES) += eseries.o eseries_udc.o obj-$(CONFIG_MACH_E330) += e330.o obj-$(CONFIG_MACH_E350) += e350.o -obj-$(CONFIG_MACH_E740) += e740.o e740_lcd.o -obj-$(CONFIG_MACH_E750) += e750.o e750_lcd.o -obj-$(CONFIG_MACH_E400) += e400.o e400_lcd.o -obj-$(CONFIG_MACH_E800) += e800.o e800_lcd.o +obj-$(CONFIG_MACH_E740) += e740.o +obj-$(CONFIG_MACH_E750) += e750.o +obj-$(CONFIG_MACH_E400) += e400.o +obj-$(CONFIG_MACH_E800) += e800.o obj-$(CONFIG_MACH_PALMTX) += palmtx.o ifeq ($(CONFIG_MACH_ZYLONITE),y) diff --git a/arch/arm/mach-pxa/e400.c b/arch/arm/mach-pxa/e400.c index 405e6dc..0bcf9ec 100644 --- a/arch/arm/mach-pxa/e400.c +++ b/arch/arm/mach-pxa/e400.c @@ -17,12 +17,41 @@ #include #include +#include #include #include +#include + #include "generic.h" #include "eseries.h" +/* ------------------------ E400 LCD definitions ------------------------ */ + +static struct pxafb_mode_info e400_pxafb_mode_info = { + .pixclock = 140703, + .xres = 240, + .yres = 320, + .bpp = 16, + .hsync_len = 4, + .left_margin = 28, + .right_margin = 8, + .vsync_len = 3, + .upper_margin = 5, + .lower_margin = 6, + .sync = 0, +}; + +static struct pxafb_mach_info e400_pxafb_mach_info = { + .modes = &e400_pxafb_mode_info, + .num_modes = 1, + .lccr0 = LCCR0_Color | LCCR0_Sngl | LCCR0_Act, + .lccr3 = 0, + .pxafb_backlight_power = NULL, +}; + +/* ------------------------ E400 MFP config ----------------------------- */ + static unsigned long e400_pin_config[] __initdata = { /* Chip selects */ GPIO15_nCS_1, /* CS1 - Flash */ @@ -41,9 +70,12 @@ static unsigned long e400_pin_config[] __initdata = { GPIO0_GPIO | WAKEUP_ON_EDGE_RISE, }; +/* ---------------------------------------------------------------------- */ + static void __init e400_init(void) { pxa2xx_mfp_config(ARRAY_AND_SIZE(e400_pin_config)); + set_pxa_fb_info(&e400_pxafb_mach_info); } MACHINE_START(E400, "Toshiba e400") diff --git a/arch/arm/mach-pxa/e400_lcd.c b/arch/arm/mach-pxa/e400_lcd.c deleted file mode 100644 index 2638841..0000000 --- a/arch/arm/mach-pxa/e400_lcd.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * e400_lcd.c - * - * (c) 2005 Ian Molton - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#include -#include -#include - -#include -#include -#include - -static struct pxafb_mode_info e400_pxafb_mode_info = { - .pixclock = 140703, - .xres = 240, - .yres = 320, - .bpp = 16, - .hsync_len = 4, - .left_margin = 28, - .right_margin = 8, - .vsync_len = 3, - .upper_margin = 5, - .lower_margin = 6, - .sync = 0, -}; - -static struct pxafb_mach_info e400_pxafb_mach_info = { - .modes = &e400_pxafb_mode_info, - .num_modes = 1, - .lccr0 = LCCR0_Color | LCCR0_Sngl | LCCR0_Act, - .lccr3 = 0, - .pxafb_backlight_power = NULL, -}; - -static int __init e400_lcd_init(void) -{ - if (!machine_is_e400()) - return -ENODEV; - - set_pxa_fb_info(&e400_pxafb_mach_info); - return 0; -} - -module_init(e400_lcd_init); - -MODULE_AUTHOR("Ian Molton "); -MODULE_DESCRIPTION("e400 lcd driver"); -MODULE_LICENSE("GPLv2"); - diff --git a/arch/arm/mach-pxa/e740.c b/arch/arm/mach-pxa/e740.c index 560a8a2..ef0c3c8 100644 --- a/arch/arm/mach-pxa/e740.c +++ b/arch/arm/mach-pxa/e740.c @@ -12,6 +12,11 @@ #include #include +#include +#include +#include + +#include